The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

use v5.12;

sub maxlen   (@    );
sub seqcount ( $ _ );
sub cvmap    ( _   );
sub cvlen  ( _   );
sub as_CV  ( _   );
sub nsyl  ( _   );

use constant MANY => 8;

our $RX_Subs;
our ($V, $C, $VC, $CV);

UNITCHECK { 
    init_subs(); 
}

$| = 1;

use Lingua::EN::Syllable;

@ARGV = "/usr/share/dict/words" if !@ARGV && -t;

main();
exit;

sub main {
    while (<>) {
	#next unless /y/i;
	chomp;
	my $my_count  = nsyl;
	my $his_count = syllable($_);
	next if $my_count == $his_count;
	printf "%d %d %s %s\n", $my_count, $his_count, $_, as_CV;
    } 
}

sub nsyl(_) {
    my $cv = as_CV(shift);
    # $cv =~ s/VV/V/g;
    return $cv =~ y/V//;
} 

sub as_CV(_) {
    local $_ = lc shift;
    s/[gq]ue(?=$|$C)/k/g;
    s/qu/qw/g;
    s/(\p{IsConsonant})\1\Kle$/el/;
    s/[ai]\Kble/bel/;
    s/ble$/bel/;
    s/ive(?=ly|ness)$/iv/;
    s/$C\Kre$/er/;
    # s/$C\Ked$/d/;
    s/ism$/isum/;
    s/$V$C+\Ke$//;
    s/[xtcs]i\K[ao]([ln])/$1/g;
    s/eye/ee/g;
    s/$C\Kle(?:ness)?$/el/;
    s/$V\Ky(?=$V)/C/g;
    s/(?=$C)y(?=[ao])/V/g;
    s/$C\Ky(?=$C)/V/g;
    s/$C\Ky$/V/g;
    s/$C\Ky/C/g;
    s/$V\Ky/V/g;
    s/$C/C/g;
    s/$V/V/g;
    return $_;
} 

sub main2 {

    while (<>) { 
	chomp;

	next if MANY * 2 > length;

	# next unless /y/i;

	# whole strings only
	# next unless cvmap =~ /^(?:CV)+$/ || cvmap =~ /^(?:VC)+$/;

	my @just_VC = seqcount qr/ \p{IsVowel}       \p{IsConsonant}   /x;
	my @just_CV = seqcount qr/ \p{IsConsonant}   \p{IsVowel}       /x;
	my @many_VC = seqcount qr/ \p{IsVowel}     + \p{IsConsonant} + /x;
	my @many_CV = seqcount qr/ \p{IsConsonant} + \p{IsVowel}     + /x;

	my $just_VC = maxlen @just_VC;
	my $just_CV = maxlen @just_CV;
	my $many_VC = maxlen @many_VC;
	my $many_CV = maxlen @many_CV;

	next unless grep { $_ > MANY } $just_VC, $just_CV, $many_VC, $many_CV; 


	say;
	say cvmap;
	say "  VC   = " => fmt(@just_VC) if $just_VC > MANY;
	say "  CV   = " => fmt(@just_CV) if $just_CV > MANY;
	say " (VC)+ = " => fmt(@many_VC) if $many_VC > MANY;
	say " (CV)+ = " => fmt(@many_CV) if $many_CV > MANY;
    }

}

sub maxlen(@) {
    my $max = 0;
    for (@_) {
	$max = cvlen if $max < cvlen;
    } 
    return $max;
} 

sub fmt {
    return sprintf " %s", comma(grep { cvlen > MANY } @_);
} 

sub comma {
    return join(", " => map { $_ . "/" . cvmap . "/" . cvlen } @_);
} 

sub seqcount($_) {
    die "expected two args" unless @_ == 2;
    my ($pat, $str) = @_;
    die "expected pattern in arg1" unless ref($pat) eq ref(qr/./);
    die "expected str in arg2"     if     ref($str);

    $str = wiser($str);

    state $rx_cache = { };
    $rx_cache->{$pat} ||= qr/ (?= ( (?: $pat )+ ) ) /x;

    my @matches = $str =~ /$rx_cache->{$pat}/g;
    return @matches;

} 

sub init_subs {
    our $RX_Subs = qr{
	(?(DEFINE)
	    (?<vowel>
                (?&diphthong) 
	      | \p{IsVowel}
	      | 
		(?<=  \p{IsConsonant} ) 
		(?<!  V               )
		[yY]                 
	    )
	    (?<consonant>
		(?&digraph)
	      | (?= \pL ) ( [^aeiou] ) \g{-1}
	      | \p{IsConsonant} 
	      | (?<= [V\p{IsVowel}] ) [yY] (?= [V\p{IsVowel}] ) 
              | [yY] (?= [V\p{IsVowel}] ) 
	    )

	    (?<digraph>
		rrh
	      | sch
	      | sci (?= ous )
	      |	[cgkprstw] h
	      | ng (?= \p{IsConsonant} | $ )
	      | ^ [gk] n
	      | mb (?!= \p{IsVowel} )
	      | ^ ps
	    )

	    (?<diphthong>
		  eau | oue
		| aa | ae | ai | au | a[yV]
		| ea | ee | ei | e[yV]
		| eo | eu 
		| ie 
		| oa | oe | oi | o[yV]
		| oo | ou | ow 
		| ue
		| ui | u[yV]
	    )
	)
    }x;

    our $V = qr{ $RX_Subs (?&vowel)     }x;
    our $C = qr{ $RX_Subs (?&consonant) }x;

    our $VC = qr{$V$C};
    our $CV = qr{$C$V};
} 

sub wiser {
    local $_ = shift();

    #s/[cgpst]h/h/gi;

    s/ (?<= \p{IsVowel} )     [yY] (?= \p{IsVowel} ) /j/gx;
    s/ (?<= \p{IsConsonant} ) [yY]                   /i/gx;
    s/                        [yY] (?= \p{IsVowel} ) /j/gx;

    return $_;
} 


sub cvmap(_) {
    die "expected one arg" unless @_ == 1;
    my $str = shift();
    die "expected string arg" if ref $str;

    $str = wiser($str);

    $str =~ s{  
                ( \p{IsConsonant} )
              | ( \p{IsVowel}     )
            }{
                length($1) ? "C" : "V";
            }gxe; 
    return $str;
} 

sub cvlen(_) {
    my $map = &cvmap;

    for ($map) {
	s/C{2,}/C/g;
	s/V{2,}/C/g;
    }

    return length($map) / 2;

} 

sub IsVowel {
    return <<'END';
0061
0065
0069
006F
0075
END

} 
sub IsConsonant {
    return <<'END';
0062	0064
0066	0068
006A	006E
0070	0074
0076	0078
007A
END
}