lib/Text/Phonetic/Phonix.pm

# ============================================================================
package Text::Phonetic::Phonix;
# ============================================================================
use utf8;

use Moose;
extends qw(Text::Phonetic);

__PACKAGE__->meta->make_immutable;

our $VERSION = $Text::Phonetic::VERSION;

our $VOVEL = '[AEIOU]';
our $VOVEL_WITHY = '[AEIOUY]';
our $CONSONANT = '[BCDFGHJLMNPQRSTVXZXY]';

our @VALUES = (
    [qr/[AEIOUHWY]/,0],
    [qr/[BP]/,1],
    [qr/[CGJKQ]/,2],
    [qr/[DT]/,3],
    [qr/L/,4],
    [qr/[MN]/,5],
    [qr/R/,6],
    [qr/[FV]/,7],
    [qr/[SXZ]/,8],
);

our @RULES = (
    [qr/DG/,'G'],
    [qr/C([OAU])/,'K1'],
    [qr/C[YI]/,'SI'],
    [qr/CE/,'SE'],
    [qr/^CL($VOVEL)/,'KL1'],
    [qr/CK/,'K'],
    [qr/[GJ]C$/,'K'],
    [qr/^CH?R($VOVEL)/,'KR1'],
    [qr/^WR/,'R'],
    [qr/NC/,'NK'],
    [qr/CT/,'KT'],
    [qr/PH/,'F'],
    [qr/AA/,'AR'], #neu
    [qr/SCH/,'SH'],
    [qr/BTL/,'TL'],
    [qr/GHT/,'T'],
    [qr/AUGH/,'ARF'],
    [qr/(\w)LJ($VOVEL)/,'1LD2'],
    [qr/LOUGH/,'LOW'],
    [qr/^Q/,'KW'],
    [qr/^KN/,'N'],
    [qr/GN$/,'N'],
    [qr/GHN/,'N'],
    [qr/GNE$/,'N'],
    [qr/GHNE/,'NE'],
    [qr/GNES$/,'NS'],
    [qr/^GN/,'N'],
    [qr/(\w)GN($CONSONANT)/,'1N2'],
    [qr/^PS/,'S'],
    [qr/^PT/,'T'],
    [qr/^CZ/,'C'],
    [qr/($VOVEL)WZ(\w)/,'1Z2'],
    [qr/(\w)CZ(\w)/,'1CH2'],
    [qr/LZ/,'LSH'],
    [qr/RZ/,'RSH'],
    [qr/(\w)Z($VOVEL)/,'1S2'],
    [qr/ZZ/,'TS'],
    [qr/($CONSONANT)Z(\w)/,'1TS2'],
    [qr/HROUGH/,'REW'],
    [qr/OUGH/,'OF'],
    [qr/($VOVEL)Q($VOVEL)/,'1KW2'],
    [qr/($VOVEL)J($VOVEL)/,'1Y2'],
    [qr/^YJ($VOVEL)/,'Y1'],
    [qr/^GH/,'G'],
    [qr/($VOVEL)GH$/,'1E'],
    [qr/^CY/,'S'],
    [qr/NX/,'NKS'],
    [qr/^PF/,'F'],
    [qr/DT$/,'T'],
    [qr/(T|D)L$/,'1IL'],
    [qr/YTH/,'ITH'],
    [qr/^TS?J($VOVEL)/,'CH1'],
    [qr/^TS($VOVEL)/,'T1'],
    [qr/TCH/,'CH'], # old che
    [qr/^($VOVEL)WSK/,'1VSIKE'],
    [qr/^[PM]N($VOVEL)/,'N1'],
    [qr/($VOVEL)STL/,'1SL'],
    [qr/TNT$/,'ENT'],
    [qr/EAUX$/,'OH'],
    [qr/EXCI/,'ECS'],
    [qr/X/,'ECS'],
    [qr/NED$/,'ND'],
    [qr/JR/,'DR'],
    [qr/EE$/,'EA'],
    [qr/ZS/,'S'],
    [qr/($VOVEL)H?R($CONSONANT)/,'1AH2'],
    [qr/($VOVEL)HR$/,'1AH'],
    [qr/RE$/,'AR'],
    [qr/($VOVEL)R$/,'1AH'],
    [qr/LLE/,'LE'],
    [qr/($CONSONANT)LE(S?)$/,'1ILE2'],
    [qr/E$/,''],
    [qr/ES$/,'S'],
    [qr/($VOVEL)SS/,'1AS'],
    [qr/($VOVEL)MB$/,'1M'],
    [qr/MPTS/,'MPS'],
    [qr/MPS/,'MS'],
    [qr/MPT/,'MT'],

);

#sub _do_compare {
#	my $obj = shift;
#	my $result1 = shift;
#	my $result2 = shift;
#
#	# Main values are different
#	return 0 unless ($result1->[0] eq $result2->[0]);
#	
#	# Ending values the same
#	return 75 if ($result1->[1] eq $result2->[1]);
#	
#	# Ending values differ in length, and are same for the shorter
#	my $length1 = length $result1->[1];
#	my $length2 = length $result2->[1];
#	if ($length1 > $length2
#		&& $length1 - $length2 == 1) {
#		return 50 if (substr($result1->[1],0,$length2) eq $result2->[1]);
#	 }elsif ($length2 > $length1
#		&& $length2 - $length1 == 1) {	
#		return 50 if (substr($result2->[1],0,$length1) eq $result1->[1]);
#	}
#	
#	return 25;
#}
#The algorithm always returns either a scalar value or an array reference with 
#two elements. The fist element represents the sound of the name without the 
#ending sound, and the second element represents the ending sound. To get a 
#full representation of the name you need to concat the two elements.
#
#If you want to compare two names the following rules apply:
#
#=over
#
#=item * If the ending sound values of an entered name and a retrieved name are 
#the same, the retrieved name is a LIKELY candidate.
#
#=item * If an entered name has an ending-sound value, and the retrieved name 
#does not, then the retrieved name is a LEAST-LIKELY candidate.
#
#=item * If the two ending-sound values are the same for the length of the 
#shorter, and the difference in length between the two ending-sound is one 
#digit only, then the retrieved name isa LESS-LIKELY candidate.
#
#=item * All other cases result in LEAST-LIKELY candidates.
#
#=back

sub _do_encode {
    my ($self,$string) = @_;
    
    my ($original_string, $first_char);
    $original_string = $string;
    
    # To uppercase and remove other characters
    $string = uc($string);
    $string =~ tr/A-Z//cd;
    
    # RULE 1: Replcace rule
    foreach my $rule (@RULES) {
        my $regexp = $rule->[0];
        my $replace = $rule->[1];
        $string =~ s/$regexp/_replace($replace,$1,$2)/ge;
    }
    
    # RULE 2: Fetch first character
    $first_char = substr($string,0,1,'');
    
    # RULE 3: Exceptions for first character rule
    if (grep { $first_char eq $_ } qw(A E I O U Y)) {
        $first_char = 'v';
        $string =~ s/^$VOVEL_WITHY//;
    } elsif ($first_char eq 'W' || $first_char eq 'H') {
        #$string =~ s/^[WH]//;
    }
    
    # RULE 4
    $string =~ s/ES$/S/;
    # RULE 5
    $string =~ s/($VOVEL_WITHY)$/$1E/;
    # RULE 6
    #$string =~ s/\w$//; # This rule seems kind of strict
    # RULE 7-8
#   if ($string =~ s/($VOVEL_WITHY)([A-Z]+)$/$2/) {
#       # RULE 13
#       $last_string = _transform($2);
#   }
    
    # RULE 9-11
    $string = _transform($string);
    
    # RULE 12
    $string = $first_char.$string;
    
    #$string .= $last_string if (defined $last_string);
    $string .= '0'  x (8-length $string);
    $string = substr($string,0,8);
    
    return $string;
}

sub _transform {
    my $string = shift;
    return unless defined $string;
    
    # RULE 9
    $string =~ s/([AEIOUYHW])//g;
    # RULE 10
    $string =~ s/($CONSONANT+)\1/$1/g;
    # RULE 11
    foreach my $value (@VALUES) {
        my $regexp = $value->[0];
        $string =~ s/$regexp/$value->[1]/g;
    }
    return $string;
}

sub _replace {
    my $replace = shift;
    my $pos1 = shift;
    my $pos2 = shift;
    
    $replace =~ s/1/$pos1/ if (defined $pos1);
    $replace =~ s/2/$pos2/ if (defined $pos2);
    
    return $replace;
}

1;

=encoding utf8

=pod

=head1 NAME

Text::Phonetic::Phonix - Phonix algorithm

=head1 DESCRIPTION

Phonix is an improved version of Soundex, developed by T.N. Gadd. Phonix 
has been incorporated into a number of WAIS implementations, including 
freeWAIS.

There seem to be two variants of the Phonix algorithm. One which also includes
the first letter in the numeric code, and one that doesn't. This module is
using the later variant.

=head1 AUTHOR

    Maroš Kollár
    CPAN ID: MAROS
    maros [at] k-1.com
    http://www.k-1.com

=head1 COPYRIGHT

Text::Phonetic::Phonix is Copyright (c) 2006,2007 Maroš. Kollár.
All rights reserved.

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.

=head1 SEE ALSO


=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)