package WWW::Correios::CEP;
use strict;
use warnings;
use LWP::UserAgent;
use HTML::TreeBuilder::XPath;
our $VERSION = 1.02;
use Encode;
use utf8;
sub new {
my ( $class, $params ) = @_;
my $this = {
_user_agent => defined $params->{user_agent}
? $params->{user_agent}
: 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
_lwp_ua => undef,
_lwp_options => $params->{lwp_options} || { timeout => 30 },
_post_url => defined $params->{post_url}
? $params->{post_url}
: 'http://www.buscacep.correios.com.br/sistemas/buscacep/resultadoBuscaCepEndereco.cfm',
_post_content => defined $params->{post_content}
? $params->{post_content}
: 'tipoCEP=LOG&semelhante=N&relaxation='
};
$this->{_lwp_options}{timeout} = $params->{timeout}
if defined $params->{timeout};
return bless $this, $class;
}
sub find {
my ( $this, $cep ) = @_;
my @list_address = $this->_extractAddress($cep);
$list_address[0]{address_count} = @list_address unless wantarray;
return wantarray ? @list_address : $list_address[0];
}
sub _extractAddress {
my ( $this, $cep ) = @_;
my @result = ();
$cep =~ s/[^\d]//go;
$cep = sprintf( '%08d', $cep );
if ( $cep =~ /^00/o || $cep =~ /(\d)\1{7}/ ) {
$result[0]->{status} = "Error: Invalid CEP number ($cep)";
}
else {
if ( !defined $this->{_lwp_ua} ) {
my $ua = LWP::UserAgent->new( %{ $this->{_lwp_options} } );
$ua->agent( $this->{_user_agent} );
$ua->timeout( $this->{_lwp_options}{timeout} );
$this->{_lwp_ua} = $ua;
}
my $ua = $this->{_lwp_ua};
my $req = HTTP::Request->new( POST => $this->{_post_url} );
$req->content_type('application/x-www-form-urlencoded');
$req->content( $this->{_post_content} . $cep );
eval {
local $SIG{ALRM} =
sub { die "Can't connect to server [alarm timeout]\n" };
alarm( $this->{_lwp_options}{timeout} + 1 );
# Pass request to the user agent and get a response back
my $res = $ua->request($req);
# Check the outcome of the response
if ( $res->is_success ) {
$this->_parseHTML( \@result, $res->content );
}
else {
$result[0]->{status} = "Error: " . $res->status_line;
}
};
alarm(0);
die $@ if ($@);
}
return wantarray ? @result : $result[0];
}
sub _parseHTML {
my ( $this, $address_ref, $html ) = @_;
my $tree = HTML::TreeBuilder::XPath->new;
$html = decode( 'iso-8859-1', $html );
$html =~ s/ //g; # <-- findvalue is keeping that
$tree->parse_content($html);
my $ref =
$tree->findnodes('//table[contains(@class,"tmptabela")]/tr[not(th)]');
while ( my $p = shift(@$ref) ) {
my $address = {};
$address->{street} = $p->findvalue('./td[1]');
$address->{neighborhood} = $p->findvalue('./td[2]');
$address->{cep} = $p->findvalue('./td[4]');
( $address->{location}, $address->{uf} ) =
split qr{\s*/\s*} => $p->findvalue('./td[3]');
if ( $address->{cep} ) {
$address->{status} = '';
}
else {
$address->{status} =
'Error: Address not found, something is wrong...';
}
push( @$address_ref, $address );
}
$address_ref->[0]->{status} = 'Error: Address not found'
if ( !@$address_ref );
return 1;
}
1;
__END__
=encoding utf8
=head1 NAME
WWW::Correios::CEP - Perl extension for extract address from CEP (zip code) number
=head1 SYNOPSIS
use WWW::Correios::CEP;
my $cepper = WWW::Correios::CEP-new;
my $address = $cepper->find( $cep );
print $address->{street}; # neighborhood, location, uf
=head1 DESCRIPTION
This module fetches CEP information (Brazilian ZIP codes) directly from
the Correios website, Brazil's official post office company.
=head1 METHODS
=head2 new
Creates a new instance of WWW::Correios::CEP. Accepts the following arguments:
=over 4
=item * timeout
when to give up connecting to the Correios website. Defaults to 30 seconds.
=item * user_agent
User Agent string. Default to "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
=item * post_url
Where to post the query. Defaults to Correios' current location (we hope!)
=item * post_content
What to post in the query. Defaults to Correios' standard options (we hope!)
=item * lwp_options
Extra options to pass to LWP::UserAgent.
=back
=head2 find( $cep )
Recieves the CEP string and tries to get address data. Returns a hashref with the following keys:
=over 4
=item * street
=item * neighborhood
=item * location
=item * uf
=item * status
=back
If there is more than one address, it returns a list of hashrefs in list context, or
just the first hashref in scalar context, together with an "C<address_count>" key with
the total returned addresses.
=head1 SEE ALSO
WWW::Correios::SRO
=head1 BUGS AND LIMITATIONS
You may reports on github:
L<https://github.com/renatocron/WWW--Correios--CEP/issues>
=head1 SUPPORT
=head2 Perldoc
You can find documentation for this module with the perldoc command.
perldoc WWW\:\:Correios\:\:CEP
=head2 Github
If you want to contribute with the code, you can fork this module on github:
L<https://github.com/renatocron/WWW--Correios--CEP>
=head1 AUTHOR
Renato CRON, E<lt>rentocron@cpan.orgE<gt>
=head1 ACKNOWLEDGEMENTS
2011 - Special thanks to Gabriel "gabiru" Andrade for providing a better
solution for finding addresses!
2014 - Thanks to Garu, for removing legacy test code and improving docs!
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2011-2014 by RenatoCRON
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.
See http://dev.perl.org/licenses/ for more information.