The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

use Test::More;

use Inline C => <<END_C;
int isUTF8(const char *s, int len) {
  int i;

  unsigned int y[len];
  for (i=0; i<len; i++) {
    if ((s[i] & 128) != 0) {
        y[i] = (s[i] & 127) + 128;
    } else {
        y[i] = s[i];
    }
  }

  for (i=0; i<len; i++) {
    if ( i+3 < len &&                                                              /* 4-byte: byte 1 and 2 */
         ( ( y[i] == 0xf0               && (y[i+1] & 240) == 144 )             ||  /* b1 == F0 */
           ( y[i] > 0xf1 && y[i] < 0xf3 && (y[i+1] & 192) == 128 )             ||  /* b1 >= F1 and <= F3 */
           ( y[i] == 0xf4               && y[i+1] >= 0x80 && y[i+1] <= 0x8f )) &&  /* b1 == F4 */ 
         ( y[i+2] & 192 ) == 128                                               &&  /* byte 3 */
         ( y[i+3] & 192 ) == 128                                                   /* byte 4 */
       ) {
      i += 3;
    }
    else 
    if ( i+2 < len &&                                                              /* 3-byte: byte 1 and 2 */
         ( ( y[i] == 0xe0 && (y[i+1] & 224) == 160 )                           ||  /* b1 == E0 */
           ( y[i] == 0xed && (y[i+1] & 224) == 128 )                           ||  /* b1 == ED */
           ( ((y[i] >= 0xe1 && y[i] <= 0xec) || y[i] == 0xee || y[i] == 0xef)      /* b1 >= E1 and <= EC or == EE or == EF */
                            && (y[i+1] & 192) == 128 ) )                       &&
             (y[i+2] & 192) == 128) {                                              /* byte 3 */
      i += 2;
    }
    else if (i+1 < len &&                                                          /* 2-byte: byte 1 */
             y[i] >= 0xc2 && y[i] <= 0xdf                                      &&  /* b1 >= C2 and <= DF */
               (y[i+1] & 192) == 128) {                                            /* byte 2 */
      i += 1;
    }
    else if ((y[i] & 128) != 0) {
      return 0;
    }
  }
  return 1; 
}
END_C

my %valid = (
    "abc123"           => 1,
    "\xc0\x81"         => 0,
    "\xc1\xa0"         => 0,
    "\xc2\x81"         => 1,
    "\xdf\x80"         => 1,
    "\xdf\xc0"         => 0,
    "\xe0\x80"         => 0,
    "\xe0\x81\x80"     => 0,
    "\xe0\xa0\x80"     => 1,
    "\xed\xa0\x80"     => 0,
    "\xee\x81\x81"     => 1,
    "\xe9a"            => 0,
    "\xf0\x90\xbe\xbf" => 1,
    "\xf2\x79\x80\x80" => 0,
    "\xf4\x8f\xbf\x80" => 1,
);


for my $k (sort keys %valid) {
    my $bytes  = pack('C*', unpack('C*', $k));
    my $length = length($bytes);
    my $isUTF8 = isUTF8($bytes,$length);
    my $hex    = join ' ', map { sprintf( "%x", ord($_) ) } (split(//,$bytes));
    my $vStr   = ($valid{$k}) ? 'valid' : 'invalid'; 
    is( $isUTF8, $valid{$k}, "utf8-test: $hex ($vStr)" );
}

done_testing();