The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!./perl -w
# Test for malfunctions of utf8 cache

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';
}

use strict;

plan(tests => 16);

SKIP: {
skip_without_dynamic_extension("Devel::Peek", 2);

my $out = runperl(stderr => 1,
		  progs => [ split /\n/, <<'EOS' ]);
    require Devel::Peek;
    $a = qq(hello \x{1234});
    for (1..2) {
        bar(substr($a, $_, 1));
    }
    sub bar {
        $_[0] = qq(\x{4321});
        Devel::Peek::Dump($_[0]);
    }
EOS

like($out, qr/\ASV =/, "check we got dump output"); # [perl #121337]

my $utf8magic = qr{ ^ \s+ MAGIC \s = .* \n
                      \s+ MG_VIRTUAL \s = .* \n
                      \s+ MG_TYPE \s = \s PERL_MAGIC_utf8 .* \n
                      \s+ MG_LEN \s = .* \n }xm;

unlike($out, qr{ $utf8magic $utf8magic }x,
       "no duplicate utf8 magic");

} # SKIP

# With bad caching, this code used to go quadratic and take 10s of minutes.
# The 'test' in this case is simply that it doesn't hang.

{
    local ${^UTF8CACHE} = 1; # enable cache, disable debugging
    my $x = "\x{100}" x 1000000;
    while ($x =~ /./g) {
	my $p = pos($x);
    }
    pass("quadratic pos");
}

# Get-magic can reallocate the PV.  Check that the cache is reset in
# such cases.

# Regexp vars
"\x{100}" =~ /(.+)/;
() = substr $1, 0, 1;
"a\x{100}" =~ /(.+)/;
is ord substr($1, 1, 1), 0x100, 'get-magic resets utf8cache on match vars';

# Substr lvalues
my $x = "a\x{100}";
my $l = \substr $x, 0;
() = substr $$l, 1, 1;
substr $x, 0, 1, = "\x{100}";
is ord substr($$l, 1, 1), 0x100, 'get-magic resets utf8cache on LVALUEs';

# defelem magic
my %h;
sub {
  $_[0] = "a\x{100}";
  () = ord substr $_[0], 1, 1;
  $h{k} = "\x{100}"x2;
  is ord substr($_[0], 1, 1), 0x100,
    'get-magic resets uf8cache on defelems';
}->($h{k});


# Overloading can also reallocate the PV.

package UTF8Toggle {
    use overload '""' => 'stringify', fallback => 1;

    sub new {
	my $class = shift;
	my $value = shift;
	my $state = shift||0;
	return bless [$value, $state], $class;
    }

    sub stringify {
	my $self = shift;
	$self->[1] = ! $self->[1];
	if ($self->[1]) {
	    utf8::downgrade($self->[0]);
	} else {
	    utf8::upgrade($self->[0]);
	}
	$self->[0];
    }
}
my $u = UTF8Toggle->new(" \x{c2}7 ");

pos $u = 2;
is pos $u, 2, 'pos on overloaded utf8 toggler';
() = "$u"; # flip flag
pos $u = 2;
is pos $u, 2, 'pos on overloaded utf8 toggler (again)';

() = ord ${\substr $u, 1};
is ord ${\substr($u, 1)}, 0xc2,
    'utf8 cache + overloading does not confuse substr lvalues';
() = "$u"; # flip flag
() = ord substr $u, 1;
is ord substr($u, 1), 0xc2,
    'utf8 cache + overloading does not confuse substr lvalues (again)';

$u = UTF8Toggle->new(" \x{c2}7 ");
() = ord ${\substr $u, 2};
{ no warnings; ${\substr($u, 2, 1)} = 0; }
is $u, " \x{c2}0 ",
    'utf8 cache + overloading does not confuse substr lvalue assignment';
$u = UTF8Toggle->new(" \x{c2}7 ");
() = "$u"; # flip flag
() = ord ${\substr $u, 2};
{ no warnings; ${\substr($u, 2, 1)} = 0; }
is $u, " \x{c2}0 ",
    'utf8 cache + overload does not confuse substr lv assignment (again)';


# Typeglobs and references should not get a cache
use utf8;

#substr
my $globref = \*αabcdefg_::_;
() = substr($$globref, 2, 3);
*_abcdefgα:: = \%αabcdefg_::;
undef %αabcdefg_::;
{ no strict; () = *{"_abcdefgα::_"} }
is substr($$globref, 2, 3), "abc", 'no utf8 pos cache on globs';

my $ref = bless [], "αabcd_";
() = substr($ref, 1, 3);
bless $ref, "_abcdα";
is substr($ref, 1, 3), "abc", 'no utf8 pos cache on references';

#length
$globref = \*αabcdefg_::_;
() = "$$globref";  # turn utf8 flag on
() = length($$globref);
*_abcdefgα:: = \%αabcdefg_::;
undef %αabcdefg_::;
{ no strict; () = *{"_abcdefgα::_"} }
is length($$globref), length("$$globref"), 'no utf8 length cache on globs';

$ref = bless [], "αabcd_";
() = "$ref"; # turn utf8 flag on
() = length $ref;
bless $ref, "α";
is length $ref, length "$ref", 'no utf8 length cache on references';