#!/usr/bin/perl -w
package Mail::Miner::Recogniser::Phone;
$Mail::Miner::recognisers{"".__PACKAGE__} =
{
title => "Phone numbers",
help => "Match messages which contain a phone number",
keyword => "phone"
};
my $exchanges =
qr/(?:2(?:0[123456789]|1[023456789]|2[4589]|3[149]|4[0268]|5[012346]|6[024789]|7[06]|8[149])|3(?:0[123456789]|1[023456789]|2[013]|3[04679]|4[057]|5[12]|6[01]|86)|4(?:0[123456789]|1[023456789]|2[35]|3[45]|4[013]|50|69|7[0389]|8[04])|5(?:0[123456789]|1[023456789]|20|30|4[01]|5[19]|6[1237]|7[0134]|8[056])|6(?:0[123456789]|1[023456789]|2[036]|3[016]|4[1679]|5[01]|6[0124]|7[018]|82)|7(?:0[123456789]|1[23456789]|2[047]|3[124]|40|5[478]|6[0357]|7[023458]|8[014567])|8(?:0[0123456789]|1[023456789]|28|3[012]|4[3578]|5[06789]|6[023456789]|7[0678]|88)|9(?:0[123456789]|1[023456789]|2[058]|3[1679]|4[0179]|5[246]|7[012389]|8[059]))/ox;
sub process {
my ($class, %hash) = @_;
my $body = $hash{getbody}->();
my $usphone_prefix = qr/\($exchanges\)|$exchanges/;
my $usphone_suffix = qr/\s+\d{3}[-\s]+\d{4}/;
my $usphone = qr/$usphone_prefix$usphone_suffix/;
my $extension_suffix = qr/\s*(?:(?:ext|x)[\s.:]+\d+)?/i;
$body =~ s/IS[SB]N\D+\d+x?//i; # Bastards.
my %found = ();
my $phonestuff = qr/\+?[\d\s\(\)-]+\d$extension_suffix/;
# "Maximal munch"
my $phone_words = qr/(?:t|p|Tel|phone|mobile|mob|f|fax|m|telephone)/i;
$found{$1} = "sure" while $body =~ s/\b$phone_words[:.]*
\s*($phonestuff)//x;
# Magic words
my $magic = qr/number|phone|call|cell|mobile|fax|contact|ring/i;
$found{$1} = "very likely" while $body =~ s/\b$magic[^+\(\d\)]+($phonestuff)//;
# Oftel recommended presentations with brackets
my $oftel_b = qr/
(\(0\d{3}\) \s+ \d{3} \s+ \d{4}|
\(0\d{2}\) \s+ \d{4} \s+ \d{4}|
\(0\d{4}\) \s+ \d{3} \s+ \d{3})/x;
$found{$1} = "sure" while $body =~ s/(?:\b|^)($oftel_b$extension_suffix)(\b|$)//;
# Oftel recommended presentations:
my $oftel = qr/(01\d{2} \s+ \d{3} \s+ \d{4}|
01\d{3} \s+ \d{3} \s+ \d{3}|
02\d \s+ \d{4} \s+ \d{4}|
0\d{4} \s+ \d{3} \s+ \d{3})/x;
$found{$1} = "UK" while $body =~ s/(?:\b|^)($oftel$extension_suffix)(\b|$)//;
# Lax Oftel:
my $oftel_l = qr/(01\d{2} \s* \d{7}|
01\d{3} \s* \d{6}|
02\d \s* \d{8}|
\(0\d{3}\) \s* \d{3} \s+ \d{4}|
\(0\d{2}\) \s* \d{4} \s+ \d{4}|
\(0\d{4}\) \s* \d{3} \s+ \d{3})/x;
$found{$1} = "UK" while $body =~ s/(?:\b|^)($oftel_l$extension_suffix)(\b|$)//;
my $ukphone_int = qr/(\+44\s*|44\s+)\(?[\(\)\d]{2,8}\)?/;
my $ukphone_code = qr/\(0\d{2,6}\)|0\d{2,6}\s+/;
my $ukphone_suffix = qr/\s*[\d -]{6,15}/;
my $ukphone = qr/($ukphone_int|$ukphone_code)$ukphone_suffix/;
$found{$1} = "UK" while $body =~ s/(?:\b|^)($ukphone$extension_suffix)(\b|$)//;
$found{$1} = "US" while $body =~ /(?:\b|^)($usphone)(\b|$)/g;
return map { s/^\s+//; s/\s+$//; $_ } grep $_, keys %found;
}
1;