The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

  utf8.c AOK

     [utf8_to_uv]
     Malformed UTF-8 character
	my $a = ord "\x80" ;

     Malformed UTF-8 character
	my $a = ord "\xf080" ;
     <<<<<< this warning can't be easily triggered from perl anymore

     [utf16_to_utf8]
     Malformed UTF-16 surrogate		
     <<<<<< Add a test when something actually calls utf16_to_utf8

__END__
# utf8.c [utf8_to_uv] -W
BEGIN {
    if (ord('A') == 193) {
        print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
        exit 0;
    }
}
use utf8 ;
my $a = "snøstorm" ;
{
    no warnings 'utf8' ;
    my $a = "snøstorm";
    use warnings 'utf8' ;
    my $a = "snøstorm";
}
EXPECT
Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
########
use warnings 'utf8';
my $d7ff  = uc(chr(0xD7FF));
my $d800  = uc(chr(0xD800));
my $dfff  = uc(chr(0xDFFF));
my $e000  = uc(chr(0xE000));
my $feff  = uc(chr(0xFEFF));
my $fffd  = uc(chr(0xFFFD));
my $fffe  = uc(chr(0xFFFE));
my $ffff  = uc(chr(0xFFFF));
my $hex4  = uc(chr(0x10000));
my $hex5  = uc(chr(0x100000));
my $maxm1 = uc(chr(0x10FFFE));
my $max   = uc(chr(0x10FFFF));
my $nonUnicode =  uc(chr(0x110000));
no warnings 'utf8';
my $d7ff  = uc(chr(0xD7FF));
my $d800  = uc(chr(0xD800));
my $dfff  = uc(chr(0xDFFF));
my $e000  = uc(chr(0xE000));
my $feff  = uc(chr(0xFEFF));
my $fffd  = uc(chr(0xFFFD));
my $fffe  = uc(chr(0xFFFE));
my $ffff  = uc(chr(0xFFFF));
my $hex4  = uc(chr(0x10000));
my $hex5  = uc(chr(0x100000));
my $maxm1 = uc(chr(0x10FFFE));
my $max   = uc(chr(0x10FFFF));
my $nonUnicode =  uc(chr(0x110000));
EXPECT
Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
########
use warnings 'utf8';
my $d800  = uc(chr(0xD800));
my $nonUnicode =  uc(chr(0x110000));
no warnings 'surrogate';
my $d800  = uc(chr(0xD800));
my $nonUnicode =  uc(chr(0x110000));
EXPECT
Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
########
use warnings 'utf8';
my $d800  = uc(chr(0xD800));
my $nonUnicode =  uc(chr(0x110000));
my $big_nonUnicode = uc(chr(0x8000_0000));
no warnings 'non_unicode';
my $d800  = uc(chr(0xD800));
my $nonUnicode =  uc(chr(0x110000));
my $big_nonUnicode = uc(chr(0x8000_0000));
EXPECT
Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
########
use warnings 'utf8';
my $d7ff  = lc pack("U", 0xD7FF);
my $d800  = lc pack("U", 0xD800);
my $dfff  = lc pack("U", 0xDFFF);
my $e000  = lc pack("U", 0xE000);
my $feff  = lc pack("U", 0xFEFF);
my $fffd  = lc pack("U", 0xFFFD);
my $fffe  = lc pack("U", 0xFFFE);
my $ffff  = lc pack("U", 0xFFFF);
my $hex4  = lc pack("U", 0x10000);
my $hex5  = lc pack("U", 0x100000);
my $maxm1 = lc pack("U", 0x10FFFE);
my $max   = lc pack("U", 0x10FFFF);
my $nonUnicode =  lc(pack("U", 0x110000));
no warnings 'utf8';
my $d7ff  = lc pack("U", 0xD7FF);
my $d800  = lc pack("U", 0xD800);
my $dfff  = lc pack("U", 0xDFFF);
my $e000  = lc pack("U", 0xE000);
my $feff  = lc pack("U", 0xFEFF);
my $fffd  = lc pack("U", 0xFFFD);
my $fffe  = lc pack("U", 0xFFFE);
my $ffff  = lc pack("U", 0xFFFF);
my $hex4  = lc pack("U", 0x10000);
my $hex5  = lc pack("U", 0x100000);
my $maxm1 = lc pack("U", 0x10FFFE);
my $max   = lc pack("U", 0x10FFFF);
my $nonUnicode =  lc(pack("U", 0x110000));
EXPECT
Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
########
use warnings 'utf8';
my $d7ff  = ucfirst "\x{D7FF}";
my $d800  = ucfirst "\x{D800}";
my $dfff  = ucfirst "\x{DFFF}";
my $e000  = ucfirst "\x{E000}";
my $feff  = ucfirst "\x{FEFF}";
my $fffd  = ucfirst "\x{FFFD}";
my $fffe  = ucfirst "\x{FFFE}";
my $ffff  = ucfirst "\x{FFFF}";
my $hex4  = ucfirst "\x{10000}";
my $hex5  = ucfirst "\x{100000}";
my $maxm1 = ucfirst "\x{10FFFE}";
my $max   = ucfirst "\x{10FFFF}";
my $nonUnicode =  ucfirst "\x{110000}";
no warnings 'utf8';
my $d7ff  = ucfirst "\x{D7FF}";
my $d800  = ucfirst "\x{D800}";
my $dfff  = ucfirst "\x{DFFF}";
my $e000  = ucfirst "\x{E000}";
my $feff  = ucfirst "\x{FEFF}";
my $fffd  = ucfirst "\x{FFFD}";
my $fffe  = ucfirst "\x{FFFE}";
my $ffff  = ucfirst "\x{FFFF}";
my $hex4  = ucfirst "\x{10000}";
my $hex5  = ucfirst "\x{100000}";
my $maxm1 = ucfirst "\x{10FFFE}";
my $max   = ucfirst "\x{10FFFF}";
my $nonUnicode =  ucfirst "\x{110000}";
EXPECT
Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
########
use warnings 'utf8';
chr(0xD7FF) =~ /\p{Any}/;
chr(0xD800) =~ /\p{Any}/;
chr(0xDFFF) =~ /\p{Any}/;
chr(0xE000) =~ /\p{Any}/;
chr(0xFEFF) =~ /\p{Any}/;
chr(0xFFFD) =~ /\p{Any}/;
chr(0xFFFE) =~ /\p{Any}/;
chr(0xFFFF) =~ /\p{Any}/;
chr(0x10000) =~ /\p{Any}/;
chr(0x100000) =~ /\p{Any}/;
chr(0x10FFFE) =~ /\p{Any}/;
chr(0x10FFFF) =~ /\p{Any}/;
chr(0x110000) =~ /\p{Any}/;
no warnings 'utf8';
chr(0xD7FF) =~ /\p{Any}/;
chr(0xD800) =~ /\p{Any}/;
chr(0xDFFF) =~ /\p{Any}/;
chr(0xE000) =~ /\p{Any}/;
chr(0xFEFF) =~ /\p{Any}/;
chr(0xFFFD) =~ /\p{Any}/;
chr(0xFFFE) =~ /\p{Any}/;
chr(0xFFFF) =~ /\p{Any}/;
chr(0x10000) =~ /\p{Any}/;
chr(0x100000) =~ /\p{Any}/;
chr(0x10FFFE) =~ /\p{Any}/;
chr(0x10FFFF) =~ /\p{Any}/;
chr(0x110000) =~ /\p{Any}/;
EXPECT
Code point 0x110000 is not Unicode, no properties match it; all inverse properties do at - line 14.
########
use warnings 'utf8';
chr(0x110000) =~ /\p{Any}/;
no warnings 'non_unicode';
chr(0x110000) =~ /\p{Any}/;
EXPECT
Code point 0x110000 is not Unicode, no properties match it; all inverse properties do at - line 2.
########
require "../test.pl";
use warnings 'utf8';
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh "\x{D7FF}", "\n";
print $fh "\x{D800}", "\n";
print $fh "\x{DFFF}", "\n";
print $fh "\x{E000}", "\n";
print $fh "\x{FDCF}", "\n";
print $fh "\x{FDD0}", "\n";
print $fh "\x{FDEF}", "\n";
print $fh "\x{FDF0}", "\n";
print $fh "\x{FEFF}", "\n";
print $fh "\x{FFFD}", "\n";
print $fh "\x{FFFE}", "\n";
print $fh "\x{FFFF}", "\n";
print $fh "\x{10000}", "\n";
print $fh "\x{1FFFE}", "\n";
print $fh "\x{1FFFF}", "\n";
print $fh "\x{2FFFE}", "\n";
print $fh "\x{2FFFF}", "\n";
print $fh "\x{3FFFE}", "\n";
print $fh "\x{3FFFF}", "\n";
print $fh "\x{4FFFE}", "\n";
print $fh "\x{4FFFF}", "\n";
print $fh "\x{5FFFE}", "\n";
print $fh "\x{5FFFF}", "\n";
print $fh "\x{6FFFE}", "\n";
print $fh "\x{6FFFF}", "\n";
print $fh "\x{7FFFE}", "\n";
print $fh "\x{7FFFF}", "\n";
print $fh "\x{8FFFE}", "\n";
print $fh "\x{8FFFF}", "\n";
print $fh "\x{9FFFE}", "\n";
print $fh "\x{9FFFF}", "\n";
print $fh "\x{AFFFE}", "\n";
print $fh "\x{AFFFF}", "\n";
print $fh "\x{BFFFE}", "\n";
print $fh "\x{BFFFF}", "\n";
print $fh "\x{CFFFE}", "\n";
print $fh "\x{CFFFF}", "\n";
print $fh "\x{DFFFE}", "\n";
print $fh "\x{DFFFF}", "\n";
print $fh "\x{EFFFE}", "\n";
print $fh "\x{EFFFF}", "\n";
print $fh "\x{FFFFE}", "\n";
print $fh "\x{FFFFF}", "\n";
print $fh "\x{100000}", "\n";
print $fh "\x{10FFFE}", "\n";
print $fh "\x{10FFFF}", "\n";
print $fh "\x{110000}", "\n";
close $fh;
EXPECT
Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
Unicode non-character U+FDD0 is illegal for open interchange at - line 10.
Unicode non-character U+FDEF is illegal for open interchange at - line 11.
Unicode non-character U+FFFE is illegal for open interchange at - line 15.
Unicode non-character U+FFFF is illegal for open interchange at - line 16.
Unicode non-character U+1FFFE is illegal for open interchange at - line 18.
Unicode non-character U+1FFFF is illegal for open interchange at - line 19.
Unicode non-character U+2FFFE is illegal for open interchange at - line 20.
Unicode non-character U+2FFFF is illegal for open interchange at - line 21.
Unicode non-character U+3FFFE is illegal for open interchange at - line 22.
Unicode non-character U+3FFFF is illegal for open interchange at - line 23.
Unicode non-character U+4FFFE is illegal for open interchange at - line 24.
Unicode non-character U+4FFFF is illegal for open interchange at - line 25.
Unicode non-character U+5FFFE is illegal for open interchange at - line 26.
Unicode non-character U+5FFFF is illegal for open interchange at - line 27.
Unicode non-character U+6FFFE is illegal for open interchange at - line 28.
Unicode non-character U+6FFFF is illegal for open interchange at - line 29.
Unicode non-character U+7FFFE is illegal for open interchange at - line 30.
Unicode non-character U+7FFFF is illegal for open interchange at - line 31.
Unicode non-character U+8FFFE is illegal for open interchange at - line 32.
Unicode non-character U+8FFFF is illegal for open interchange at - line 33.
Unicode non-character U+9FFFE is illegal for open interchange at - line 34.
Unicode non-character U+9FFFF is illegal for open interchange at - line 35.
Unicode non-character U+AFFFE is illegal for open interchange at - line 36.
Unicode non-character U+AFFFF is illegal for open interchange at - line 37.
Unicode non-character U+BFFFE is illegal for open interchange at - line 38.
Unicode non-character U+BFFFF is illegal for open interchange at - line 39.
Unicode non-character U+CFFFE is illegal for open interchange at - line 40.
Unicode non-character U+CFFFF is illegal for open interchange at - line 41.
Unicode non-character U+DFFFE is illegal for open interchange at - line 42.
Unicode non-character U+DFFFF is illegal for open interchange at - line 43.
Unicode non-character U+EFFFE is illegal for open interchange at - line 44.
Unicode non-character U+EFFFF is illegal for open interchange at - line 45.
Unicode non-character U+FFFFE is illegal for open interchange at - line 46.
Unicode non-character U+FFFFF is illegal for open interchange at - line 47.
Unicode non-character U+10FFFE is illegal for open interchange at - line 49.
Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
Code point 0x110000 is not Unicode, may not be portable at - line 51.
########
require "../test.pl";
use warnings 'utf8';
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh "\x{D800}", "\n";
print $fh "\x{FFFF}", "\n";
print $fh "\x{110000}", "\n";
close $fh;
EXPECT
Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
Unicode non-character U+FFFF is illegal for open interchange at - line 6.
Code point 0x110000 is not Unicode, may not be portable at - line 7.
########
require "../test.pl";
use warnings 'utf8';
no warnings 'surrogate';
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh "\x{D800}", "\n";
print $fh "\x{FFFF}", "\n";
print $fh "\x{110000}", "\n";
close $fh;
EXPECT
Unicode non-character U+FFFF is illegal for open interchange at - line 7.
Code point 0x110000 is not Unicode, may not be portable at - line 8.
########
require "../test.pl";
use warnings 'utf8';
no warnings 'nonchar';
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh "\x{D800}", "\n";
print $fh "\x{FFFF}", "\n";
print $fh "\x{110000}", "\n";
close $fh;
EXPECT
Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
Code point 0x110000 is not Unicode, may not be portable at - line 8.
########
require "../test.pl";
use warnings 'utf8';
no warnings 'non_unicode';
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh "\x{D800}", "\n";
print $fh "\x{FFFF}", "\n";
print $fh "\x{110000}", "\n";
close $fh;
EXPECT
Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
Unicode non-character U+FFFF is illegal for open interchange at - line 7.
########
require "../test.pl";
no warnings 'utf8';
my $file = tempfile();
open(my $fh, "+>:utf8", $file);
print $fh "\x{D7FF}", "\n";
print $fh "\x{D800}", "\n";
print $fh "\x{DFFF}", "\n";
print $fh "\x{E000}", "\n";
print $fh "\x{FDCF}", "\n";
print $fh "\x{FDD0}", "\n";
print $fh "\x{FDEF}", "\n";
print $fh "\x{FDF0}", "\n";
print $fh "\x{FEFF}", "\n";
print $fh "\x{FFFD}", "\n";
print $fh "\x{FFFE}", "\n";
print $fh "\x{FFFF}", "\n";
print $fh "\x{10000}", "\n";
print $fh "\x{1FFFE}", "\n";
print $fh "\x{1FFFF}", "\n";
print $fh "\x{2FFFE}", "\n";
print $fh "\x{2FFFF}", "\n";
print $fh "\x{3FFFE}", "\n";
print $fh "\x{3FFFF}", "\n";
print $fh "\x{4FFFE}", "\n";
print $fh "\x{4FFFF}", "\n";
print $fh "\x{5FFFE}", "\n";
print $fh "\x{5FFFF}", "\n";
print $fh "\x{6FFFE}", "\n";
print $fh "\x{6FFFF}", "\n";
print $fh "\x{7FFFE}", "\n";
print $fh "\x{7FFFF}", "\n";
print $fh "\x{8FFFE}", "\n";
print $fh "\x{8FFFF}", "\n";
print $fh "\x{9FFFE}", "\n";
print $fh "\x{9FFFF}", "\n";
print $fh "\x{AFFFE}", "\n";
print $fh "\x{AFFFF}", "\n";
print $fh "\x{BFFFE}", "\n";
print $fh "\x{BFFFF}", "\n";
print $fh "\x{CFFFE}", "\n";
print $fh "\x{CFFFF}", "\n";
print $fh "\x{DFFFE}", "\n";
print $fh "\x{DFFFF}", "\n";
print $fh "\x{EFFFE}", "\n";
print $fh "\x{EFFFF}", "\n";
print $fh "\x{FFFFE}", "\n";
print $fh "\x{FFFFF}", "\n";
print $fh "\x{100000}", "\n";
print $fh "\x{10FFFE}", "\n";
print $fh "\x{10FFFF}", "\n";
print $fh "\x{110000}", "\n";
close $fh;
EXPECT