The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/home/ben/software/install/bin/perl
use warnings;
use strict;
use Getopt::Long;
use Text::Fuzzy;
use Lingua::EN::PluralToSingular 'to_singular';

# The location of the Unix dictionary.
my $dict = '/usr/share/dict/words';

# Default maximum edit distance. Five is quite a big number for a
# spelling mistake.
my $max = 5;

GetOptions (
    "dict=s" => \$dict,
    "max=i" => \$max,
);

my @words;
my %words;
my $min_length = 4;
read_dictionary ($dict, \@words, \%words);
# Known mistakes, don't repeat.
my %known;
# Spell-check each file on the command line.
for my $file (@ARGV) {
    open my $input, "<", $file or die "Can't open $file: $!";
    while (<$input>) {
        my @line = split /[^a-z']+/i, $_;
        for my $word (@line) {
	    # Remove leading/trailing apostrophes.
	    $word =~ s/^'|'$//g;
	    my $clean_word = to_singular (lc $word);
	    $clean_word =~ s/'s$//;
            if ($words{$clean_word} || $words{$word}) {
                # It is in the dictionary.
                next;
            }
            if (length $word < $min_length) {
                # Very short words are ignored.
                next;
            }
            if ($word eq uc $word) {
                # Acronym like BBC, IRA, etc.
                next;
            }
            if ($known{$clean_word}) {
                # This word was already given to the user.
                next;
            }
	    if ($clean_word =~ /(.*)ed$/ || $clean_word =~ /(.*)ing/) {
		my $stem = $1;
		if ($words{$stem} || $words{"${stem}e"}) {
		    # Past/gerund of $stem/${stem}e
		    next;
		}
		# Test for doubled end consonants,
		# e.g. "submitted"/"submit".
		if ($stem =~ /([bcdfghjklmnpqrstvwxz])\1/) {
		    $stem =~ s/$1$//;
		    if ($words{$stem}) {
			# Past/gerund of $stem/${stem}e
			next;
		    }
		}
	    }
            my $tf = Text::Fuzzy->new ($clean_word, max => $max);
            my $nearest = $tf->nearest (\@words);
	    # We have set a maximum distance to search for, so we need
	    # to check whether $nearest is defined.
            if (defined $nearest) {
                my $correction = $words[$nearest];
                print "$file:$.: '$word' may be '$correction'.\n";
                $known{$clean_word} = $correction;
            }
            else {
                print "$file:$.: $word may be a spelling mistake.\n";
                $known{$clean_word} = 1;
            }
        }
    }
    close $input or die $!;
}

exit;

sub read_dictionary
{
    my ($dict, $words_array, $words_hash) = @_;    
    open my $din, "<", $dict or die "Can't open dictionary $dict: $!";
    my @words;
    while (<$din>) {
        chomp;
	push @words, $_;
    }
    close $din or die $!;
    # Apostrophe words

    my @apo = qw/

		    let's I'll you'll he'll she'll they'll we'll I'm
		    you're he's she's it's we're they're I've they've
		    you've we've one's isn't aren't doesn't don't
		    won't wouldn't I'd you'd he'd we'd they'd
		    shouldn't couldn't didn't can't

		/;

    # Irregular past participles.
    my @pp = qw/became/;

    push @words, @apo, @pp;
    for (@words) {
        push @$words_array, lc $_;
        $words_hash->{$_} = 1;
        $words_hash->{lc $_} = 1;
    }
}