The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
##############################################################
# Text::StripAccents - remove non a-z chars from a string
#  and replace them with their a-z counterparts
##############################################################
#
# Version information
# ===================
#
# 0.1	CC	Apr 05		New module
#
# 0.11	CC	Jun 05		After feedback in cpanrating,
#				documented that the module is
#				latin1 only, and pp with no
#				prereqs
#
##############################################################


package Text::StripAccents;
use strict;
use vars qw (@ISA $VERSION @EXPORT);
use Exporter ();

@ISA = qw(Exporter);
@EXPORT = qw(stripaccents);
$VERSION="0.11";

##############################################################
=pod

=head1 NAME

 Text::StripAccents - removes accented & special characters from strings

=head1 SYNOPSIS

 use Text::StripAccents;

 my $Stripaccent = Text::StripAccents->new();

 my $convertedString = $StripAccents->strip($unconvertedString);

OR

 use Text::StripAccents;

 stripaccents($string); 

=head1 DESCRIPTION

This simple module takes accented characters and replaces them with their anglicised ASCII counterparts, e.g. Ü becomes U. It currently ONLY supports Latin1. If there are any characters I've missed out that you think should be included, please mail me and I'll add them in.

This is a pure perl module with no prerequisites.

=head1 PREREQS

None.

=head1 SEE ALSO

Text::Unaccent is a much more advanced utility to do the same job, but with a C dependency.

=head1 CHANGES

0.11 - bugfix to clarify the documentation, as per Dobrica Pavlinusic's feedback.

=head1 LICENSE

Copyright 2005 by Charles Colbourn, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Charles Colbourn - charlesc@g0n.net

(Character mapping hash supplied by Nigel Currie).

=cut


##############################################################
# Text::Stripaccent::new - constructor
##############################################################
#
# Takes as param the character set you are using. Latin1 
# support only at present
#
# returns a Stripaccent object
##############################################################

sub new
{
	my $class = shift;
	my $charset = shift;

	my %object;
	return bless \%object,$class;
}

###############################################################
# Text::Stripaccent::strip
###############################################################
#
# Removes all accented chars from a string and replaces them
# with their unaccented equivalents.
# 
# takes a string as a param, returns a converted string
#
###############################################################

sub strip
{

	my $object = shift;
	my $string = shift;


	my %IsoLatin1ToASCIITable = ("A" => "A", "À" => "A", "Á" => "A", "Â" => "A",
"Ã" => "A", "Ä" => "A", "Å" => "A",
			     "B" => "B",
    			     "C" => "C", "Ç" => "C",
    			     "D" => "D",
    			     "E" => "E", "È" => "E", "É" => "E", "Ê" => "E",
"Ë" => "E",
    			     "F" => "F",
    			     "G" => "G",
    			     "H" => "H",
    			     "I" => "I", "Ì" => "I", "Í" => "I", "Î" => "I",
"Ï" => "I",
    			     "J" => "J",
    			     "K" => "K",
    			     "L" => "L",
    			     "M" => "M",
    			     "N" => "N", "Ñ" => "N",
    			     "O" => "O", "Ò" => "O", "Ó" => "O", "Ô" => "O",
"Õ" => "O", "Ö" => "O",
    			     "P" => "P",
    			     "Q" => "Q",
    			     "R" => "R",
    			     "S" => "S",
    			     "T" => "T",
    			     "U" => "U", "Ù" => "U", "Ú" => "U", "Û" => "U",
"Ü" => "U",
    			     "V" => "V",
    			     "W" => "W",
    			     "X" => "X",
    			     "Y" => "Y", "Y" => "Y",
    			     "Z" => "Z",
    			     "a" => "a", "à" => "a", "á" => "a", "â" => "a",
"ã" => "a", "ä" => "a", "å" => "a",
    			     "b" => "b",
    			     "c" => "c", "ç" => "c",
    			     "d" => "d",
    			     "e" => "e", "è" => "e", "é" => "e", "ê" => "e",
"ë" => "e",
    			     "f" => "f",
    			     "g" => "g",
    			     "h" => "h",
    			     "i" => "i", "ì" => "i", "í" => "i", "î" => "i",
"ï" => "i",
    			     "j" => "j",
    			     "k" => "k",
    			     "l" => "l",
    			     "m" => "m",
    			     "n" => "n", "ñ" => "n",
    			     "o" => "o", "ò" => "o", "ó" => "o", "ô" => "o",
"õ" => "o", "ö" => "o",
    			     "p" => "p",
    			     "q" => "q",
    			     "r" => "r",
    			     "s" => "s",
    			     "t" => "t",
    			     "u" => "u", "ù" => "u", "ú" => "u", "û" => "u",
"ü" => "u",
    			     "v" => "v",
    			     "w" => "w",
    			     "x" => "x",
    			     "y" => "y", "y" => "y", "ý" => "y",
    			     "z" => "z",
				"ß"=>"ss");

	my @stringArray = split //,$string;
	foreach (@stringArray)
	{
		if ($IsoLatin1ToASCIITable{$_})
		{
			$_ = $IsoLatin1ToASCIITable{$_};
		}
	}

	my $returnString = join '',@stringArray;

	return $returnString;
}

################################################################
# stripaccent - function to call ::strip in non OO mode
################################################################
sub stripaccents
{
	my $string = shift;
	return __PACKAGE__->strip($string);
}



1;