The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use strict;
use warnings;
use Encode;
use Search::Tools::UTF8;

my %text = (

    'latin1'           => "\xe1 abc",
    'utf8'             => "\xc3\xa1 abc",
    'utf8 w/diacritic' => "\x61\xcc\x81 abc",
    'acsii'            => "abc 123"

);

my $w_re = qr/(\w+)/;
my $u_re = qr/((\p{L}\p{M}*|[\-\_0-9])+)/;

binmode STDOUT, ':utf8';

test();

print '=' x 30, 'converting latin1 to utf8', '=' x 30, $/;

for (keys %text)
{
    if (!Encode::is_utf8($text{$_}) && !is_valid_utf8($text{$_}))
    {
        Encode::from_to($text{$_}, 'iso-8859-1', 'utf8');
    }
}

test();

sub test
{

    for my $type (sort keys %text)
    {
        my $str = $text{$type};
        print "$type\n";
        print " Encode says $str is_utf8 = " . Encode::is_utf8($str) . "\n";
        print " STT says $str valid_utf8 = " . is_valid_utf8($str) . "\n";

        # force flag
        if (is_valid_utf8($str))
        {
            Encode::_utf8_on($str);
        }

        print " now Encode says $str is_utf8 = " . Encode::is_utf8($str) . "\n";
        print " now STT says $str valid_utf8 = "
          . is_valid_utf8($str) . "\n";

        while ($str =~ m/$w_re/g)
        {
            print "  $w_re " . pos($str) . " -> $1\n";
        }

        while ($str =~ m/$u_re/g)
        {
            print "  $u_re " . pos($str) . " -> $1\n";
        }
    }

}