t/30unicode.t - metacpan.org

#!perl

## Test everything related to Unicode.
## At the moment, this basically means testing the UTF8 client_encoding
## and $dbh->{pg_enable_utf8} bits

use 5.006;
use strict;
use warnings;
use utf8;
use charnames ':full';
use Encode qw(encode_utf8);
use Data::Dumper;
use Test::More;
use lib 't','.';
use open qw/ :std :encoding(utf8) /;
require 'dbdpg_test_setup.pl';
select(($|=1,select(STDERR),$|=1)[1]);

my $dbh = connect_database();

if (! $dbh) {
	plan skip_all => 'Connection to database failed, cannot continue testing';
}

isnt ($dbh, undef, 'Connect to database for unicode testing');


my @tests;

my $server_encoding = $dbh->selectrow_array('SHOW server_encoding');
my $client_encoding = $dbh->selectrow_array('SHOW client_encoding');

# Beware, characters used for testing need to be known to Unicode version 4.0.0,
# which is what perl 5.8.1 shipped with.
foreach (
    [ascii => 'Ada Lovelace'],
    ['latin 1 range' => "\N{LATIN CAPITAL LETTER E WITH ACUTE}milie du Ch\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}telet"],
    # I'm finding it awkward to continue the theme of female mathematicians
    ['base plane' => "Interrobang\N{INTERROBANG}"],
    ['astral plane' => "\N{MUSICAL SYMBOL CRESCENDO}"],
     ) {
    my ($range, $text) = @$_;
    my $name_d = my $name_u = $text;
    utf8::upgrade($name_u);
    # Before 5.12.0 the text to the left of => gets to be SvUTF8() under use utf8;
    # even if it's plain ASCII. This would confuse what we test for below.
    push @tests, (
        [upgraded => $range => 'text' => $name_u],
        [upgraded => $range => 'text[]' => [$name_u]],
    );
    if (utf8::downgrade($name_d, 1)) {
        push @tests, (
            [downgraded => $range => 'text' => $name_d],
            [downgraded => $range => 'text[]' => [$name_d]],
            [mixed => $range => 'text[]' => [$name_d,$name_u]],
        );
    }
}

my %ranges = (
    UTF8 => qr/.*/,
    LATIN1 => qr/\A(?:ascii|latin 1 range)\z/,
);

foreach (@tests) {
    my ($state, $range, $type, $value) = @$_;
 SKIP:
    foreach my $test (
        {
            qtype => 'placeholder',
            sql => "SELECT ?::$type",
            args => [$value],
        },
        (($type eq 'text') ? (
            {
                qtype => 'interpolated',
                sql => "SELECT '$value'::$type",
            },
            # Test that what we send is the same as the database's idea of characters:
            {
                qtype => 'placeholder length',
                sql => "SELECT length(?::$type)",
                args => [$value],
                want => length($value),
            },
            {
                qtype => 'interpolated length',
                sql => "SELECT length('$value'::$type)",
                want => length($value),
            },
        ):()),
    ) {
        skip "Can't do $range tests with server_encoding='$server_encoding'", 1
            if $range !~ ($ranges{$server_encoding} || qr/\A(?:ascii)\z/);

		skip 'Cannot perform range tests if client_encoding is not UTF8', 1
			if $client_encoding ne 'UTF8';

        foreach my $enable_utf8 (1, 0, -1) {
            my $desc = "$state $range UTF-8 $test->{qtype} $type (pg_enable_utf8=$enable_utf8)";
            my @args = @{$test->{args} || []};
            my $want = exists $test->{want} ? $test->{want} : $value;
            if (!$enable_utf8) {
                $want = ref $want ? [ map encode_utf8($_), @{$want} ] ## no critic
                    : encode_utf8($want);
            }

            is(utf8::is_utf8($test->{sql}), ($state eq 'upgraded'), "$desc query has correct flag")
                if $test->{qtype} =~ /^interpolated/;
            if ($state ne 'mixed') {
                foreach my $arg (map { ref($_) ? @{$_} : $_ } @args) { ## no critic
                    is(utf8::is_utf8($arg), ($state eq 'upgraded'), "$desc arg has correct flag")
                }
            }
            $dbh->{pg_enable_utf8} = $enable_utf8;

            ## Skip pg_enable_utf=0 for now
            if (0 == $enable_utf8) {
                if ($range eq 'latin 1 range' or $range eq 'base plane' or $range eq 'astral plane') {
                    pass ("Skipping test of pg_enable_utf=0 with $range");
                    next;
                }
            }


            my $sth = $dbh->prepare($test->{sql});
            eval {
                $sth->execute(@args);
            };
            if ($@) {
                diag "Failure: enable_utf8=$enable_utf8, SQL=$test->{sql}, range=$range\n";
                die $@;
            }
            else {
                my $result = $sth->fetchall_arrayref->[0][0];
                is_deeply ($result, $want, "$desc returns proper value");
                if ($test->{qtype} !~ /length$/) {
                    # Whilst XS code can set SVf_UTF8 on an IV, the core's SV
                    # copying code doesn't copy it. So we can't assume that numeric
                    # values we see "out here" still have it set. Hence skip this
                    # test for the SQL length() tests.
                    is (utf8::is_utf8($_), !!$enable_utf8, "$desc returns string with correct UTF-8 flag")
                        for (ref $result ? @{$result} : $result);
                }
            }
        }
    }
}

my %ord_max = (
    LATIN1 => 255,
    UTF8 => 2**31,
);

# Test that what we get is the same as the database's idea of characters:
for my $name ('LATIN CAPITAL LETTER N',
              'LATIN SMALL LETTER E WITH ACUTE',
              'CURRENCY SIGN',
              # Has a different code point in Unicode, Windows 1252 and ISO-8859-15
              'EURO SIGN',
              'POUND SIGN',
              'YEN SIGN',
              # Has a different code point in Unicode and Windows 1252
              'LATIN CAPITAL LETTER S WITH CARON',
              'SNOWMAN',
              # U+1D196 should be 1 character, not a surrogate pair
              'MUSICAL SYMBOL TR',
          ) {
    my $ord = charnames::vianame($name);
  SKIP:
    foreach my $enable_utf8 (1, 0, -1) {
        my $desc = sprintf "chr(?) for U+%04X $name, \$enable_utf8=$enable_utf8", $ord;
        skip "Pg < 8.3 has broken $desc", 1
            if $ord > 127 && $dbh->{pg_server_version} < 80300;
        skip "Cannot do $desc with server_encoding='$server_encoding'", 1
            if $ord > ($ord_max{$server_encoding} || 127);
        $dbh->{pg_enable_utf8} = $enable_utf8;
         my $sth = $dbh->prepare('SELECT chr(?)');
        $sth->execute($ord);
        my $result = $sth->fetchall_arrayref->[0][0];
        if (!$enable_utf8) {
            # We asked for UTF-8 octets to arrive in Perl-space.
            # Check this, and convert them to character(s).
            # If we didn't, the next two tests are meaningless, so skip them.
            is(utf8::decode($result), 1, "Got valid UTF-8 for $desc")
                or next;
        }
        is (length $result, 1, "Got 1 character for $desc");
        is (ord $result, $ord, "Got correct character for $desc");
    }
}

cleanup_database($dbh,'test');
$dbh->disconnect();

done_testing();
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)