package WWW::Patent::Page; #modeled vaguely on LWP::UserAgent
use strict;
use warnings;
use diagnostics;
use Carp qw(carp cluck confess);
use English qw( -no_match_vars );
#use HTML::Display; ## comment out after completion; used for testing. see sub request
#my $browser = HTML::Display->new(class => 'HTML::Display::Win32::IE',); #comment out after completion; used for testing. see sub request
# use criticism 'brutal'; # handled in tests; author only
# $ prove -l lib --verbose t/999_critic.t # example of using prove
require LWP::UserAgent;
use WWW::Patent::Page::Response;
#use HTTP::Cache::Transparent;
#HTTP::Cache::Transparent::init( {
# BasePath => '/tmp/cache' ,
# NoUpdate => 60 * 60 *24 * 7 * 52 # seconds 2 minutes 2 hours 2 days 2 weeks 2 years = 1 year
#});
use subs qw( new country_known get_page _load_modules _agent _load_country_known );
my (%METHODS, %_country_known);
my (%MODULES, $default_country, $default_office, @modules_to_load);
use version; our $VERSION = qv('0.109.0'); # January, 2012
use base qw( LWP::UserAgent );
%_country_known = _load_country_known();
# user set variables:
@modules_to_load = (
'USPTO',
#'MICROPATENT',
#'JPO_IPDI' ,
# 'ESPACE_EP', # 'ESPACE_EP' bad August 2009 due to captcha use
# , 'OPEN_PATENT_SERVICES' # Watch this space!
);
# if you write your own module; please send to wanda_b_Anon@yahoo.com for distribution
$default_country = 'US';
# $default_office = 'ESPACE_EP'; # they support many countries/entities
$default_office = 'USPTO'; # 'ESPACE_EP' bad August 2009 due to captcha use
sub new {
my ($class, $doc_id, %passed_parm);
if (@_ % 2) {($class, %passed_parm) = (@_);}
else {($class, $doc_id, %passed_parm) = (@_);}
# if an odd number of parameters is passed, the first is the doc_id
# the other pairs are the hash of values, including UserAgent settings
# my ($class) = shift @_;
my %parent_parms = (
agent => "WWW::Patent::Page/$VERSION",
# cookie_jar => {},
);
my %default_parameter = (
'is_success' => undef,
'message' => undef,
'office' => $default_office, # USPTO is provided
'office_username' => undef, # e.g. MicroPatent account
'office_password' => undef, # e.g. MicroPatent password
'session_token' => undef, # e.g. session number in Micropatent, from username and password
'country' => $default_country, #US is provided
'doc_id' => undef, # US_6,123,456 as entered
'doc_id_standardized' => undef, # US6123456 sparse
'doc_id_commified' => undef, # US6,123,456
'doc_type' => undef, # PP, RE, D, etc
'format' => 'pdf', # pdf html
'page' => undef,
# 'version' => undef,
'comment' => undef,
'kind' => undef, # A B etc (not yet used)
'number' => undef, # 6123456
'tempdir' => undef, # directory for temp files USPTO_pdf
);
# my %passed_parms;
if ($doc_id) {
$default_parameter{'doc_id'} = $doc_id;
$passed_parm{'doc_id'} = $doc_id;
}
# if an odd number of parameters is passed, the first is the doc_id
# the other pairs are the hash of values, including UserAgent settings
# %passed_parm = @_;
# if ( defined($passed_parm{'country'} or defined($passed_parm{'number'}) { delete $passed_parm{'doc_id'}; $self->{'patent'}->{'doc_id'} = undef }
# Keep the patent-specific parms before creating the object.
# (the parameters defined above are the only user exposed parameters allowed)
while (my ($key, $value) = each %passed_parm) {
if (exists $default_parameter{$key}) {
$default_parameter{$key} = $value;
}
else {
$parent_parms{$key} = $value;
}
}
my $self = $class->SUPER::new(%parent_parms);
bless $self, ref $class || $class; # or is it: bless $self, $class;
# Use the patent parms now that we have a patent object.
for my $parm (keys %default_parameter) {
$self->{'patent'}->{$parm} = $default_parameter{$parm};
}
$self->cookie_jar({});
$self->env_proxy(); # get the proxy stuff set up from the environment via LWP::UserAgent
# $self->proxy(['http', 'ftp'], 'http://localhost:5364/'); #Howard P. Katseff, "Web Scraping Proxy" wsp http://www.research.att.com/~hpk/
$self->timeout(240); # set to timeout to 240 seconds from the traditional 180 seconds
push @{$self->requests_redirectable}, 'POST'; # redirect HTTP 1.1 302s LWP::UserAgent
if (!defined $self->agent) {$self->agent = $class->_agent}
$self->_load_modules(@modules_to_load); # list your custom modules here,
# and put them into the folder that holds the others, e.g. USPTO.pm
if ( defined $passed_parm{'country'}
and defined $passed_parm{'number'})
{
delete $passed_parm{'doc_id'};
$self->{'patent'}->{'doc_id'} = $passed_parm{'country'} . $passed_parm{'number'};
}
if ($self->{'patent'}->{'doc_id'}) { # if called with doc ID, parse it- unless it seems to be parsed already
$self->parse_doc_id();
}
return $self;
}
sub country_known {
my $self = shift;
my ($country_in_question) = shift;
if (exists $_country_known{$country_in_question}) {
return ($_country_known{$country_in_question});
}
else {
return (undef);
}
}
sub parse_doc_id {
my ($self, $id) = (@_);
$self->{'patent'}->{'message'} = q{};
if (!$id) {
$id = $self->{'patent'}->{'doc_id'}
or (carp 'No document id to parse' and return);
}
my ($found, $country, $type, $number, $kind, $comment) = (undef, undef, undef, undef, undef, undef);
# start country parsing
if ( $id =~ m{^ # anchor to beginning of string
[, _\.\t-]* #separator(s) (optional)
(\D\D){0,1} # country (optional) (well, sometimes the type, if country not supplied because known by other means)
[, _\.\t-]* #separator(s) (optional)
(D|PP|RE|T|H|RX|AI|d|pp|re|t|h|rx|ai|S|M|s|m){0,1} # type, if accompanied by country (use below also!)
[, _\.\t-]* #separator(s) (optional)
([, _\d-]+) # "number" REQUIRED to have digits - with interspersed separator(s) (optional)
[, _\.\t-]* #separator(s) (optional)
(
A$|A[, _\.\t-]+|B$|B[, _\.\t-]+|D$|D[, _\.\t-]+|E$|E[, _\.\t-]+|H$|H[, _\.\t-]+|
L$|L[, _\.\t-]+|M$|M[, _\.\t-]+|O$|O[, _\.\t-]+|P$|P[, _\.\t-]+|S$|S[, _\.\t-]+|
T$|T[, _\.\t-]+|U$|U[, _\.\t-]+|W$|W[, _\.\t-]+|X$|X[, _\.\t-]+|Y$|Y[, _\.\t-]+|
Z$|Z[, _\.\t-]+|
A0|A1|A2|A3|A4|A5|A6|A7|A8|A9|B1|B2|B3|B4|B5|B6|B8|B9|C$|C0|C1|C2|C3|C4|C5|
C8|C[, _\.\t-]+|F1|F2|H1|H2|P1|P2|P3|P4|P9|T1|T2|T3|T4|T5|T9|U0|U1|U2|U3|U4|
U8|W1|W2|X0|X1|X2|Y1|Y2|Y3|Y4|Y5|Y6|Y8|
a$|a[, _\.\t-]+|b$|b[, _\.\t-]+|d$|d[, _\.\t-]+|e$|e[, _\.\t-]+|h$|h[, _\.\t-]+|
l$|l[, _\.\t-]+|m$|m[, _\.\t-]+|o$|o[, _\.\t-]+|p$|p[, _\.\t-]+|s$|s[, _\.\t-]+|
t$|t[, _\.\t-]+|u$|u[, _\.\t-]+|w$|w[, _\.\t-]+|x$|x[, _\.\t-]+|y$|y[, _\.\t-]+|
z$|z[, _\.\t-]+|
a0|a1|a2|a3|a4|a5|a6|a7|a8|a9|b1|b2|b3|b4|b5|b6|b8|b9|c$|c0|c1|c2|c3|c4|c5|
c8|c[, _\.\t-]+|f1|f2|h1|h2|p1|p2|p3|p4|p9|t1|t2|t3|t4|t5|t9|u0|u1|u2|u3|u4|
u8|w1|w2|x0|x1|x2|y1|y2|y3|y4|y5|y6|y8
){0,1}
# kind code (eats up separator required before comment)
(.*) # comment (optional, if used, required to be preceded by at least one separator)
}mx
)
{
$country = $1;
$type = $2;
$number = $3;
$kind = $4;
$comment = $5;
if ($country) {$country = uc $country;}
else {$country = $default_country}
# $type = $2;
if ($type) {
$type = uc $type;
} #actually, required to be upper case
else {$type = undef;}
if ((!defined $type) && !$type && (!$_country_known{$country})) {
if ($country =~ m/(D|PP|RE|T|H|RX|AI|d|pp|re|t|h|rx|ai|S|M|s|m)/mx) {
$type = $country;
$country = $default_country;
}
else {
# carp "unrecognized _country or type: country: from '$id' ";
$self->{'patent'}->{'country'} = undef;
$self->{'patent'}->{'is_success'} = undef;
$self->{'patent'}->{'message'} = "unrecognized _country or type: country: from '$id'";
return (undef);
}
}
if ( (!exists $_country_known{$country})
|| ($type
&& (!$type =~ m/(^D$|^PP$|RE|T|H|RX|AI|d|pp|re|t|h|rx|ai)/mx))
)
{
# carp "unrecognized _country or type: country: '$country' type: '$type' from '$id' ";
$self->{'patent'}->{'country'} = undef;
$self->{'patent'}->{'is_success'} = undef;
$self->{'patent'}->{'message'} = "unrecognized _country or type: country: '$country' type: '$type' from '$id'";
return (undef);
}
# $number = $3;
if ($number) {$number =~ s/[, _\- ]//mxg;}
else {print "\nno number!!!\n"}
# $kind = $4;
if ($kind) {$kind = uc $kind}
if ($kind) {$kind =~ s/[, _\- ]//mxg;}
# $comment = $5;
if ($comment) {
$comment =~ s/^[,_\- ]*//mxg;
$comment =~ s/[,_\- ]*$//mxg;
}
$self->{'patent'}->{'country'} = $country;
$self->{'patent'}->{'doc_type'} = $type;
$self->{'patent'}->{'number'} = $number;
$self->{'patent'}->{'kind'} = $kind;
$self->{'patent'}->{'comment'} = $comment;
}
else {
carp "document id '$id'\nnot parsed.";
$self->{'patent'}{'is_success'} = undef;
$self->{'patent'}{'message'} = "document id '$id' not parsed.";
return (undef);
}
## Japanese number fiddling- later, this bind of crap may go into JPO_IPDI_parse_doc_id
if ($self->{'patent'}->{'country'} eq 'JP') {
# print "country = jp type = $self->{'patent'}->{'doc_type'}\n";
if (uc($self->{'patent'}->{'doc_type'}) eq 'H' or uc($self->{'patent'}->{'doc_type'}) eq 'S' or uc($self->{'patent'}->{'doc_type'}) eq 'T' or uc($self->{'patent'}->{'doc_type'}) eq 'M') {
my $year = substr($self->{'patent'}->{'number'}, 0, 2); # Heisei < 10 must have 0 prefix
$self->{'patent'}->{'number'} =~ s{^\d\d}{}xm;
$self->{'patent'}->{'doc_type'} .= "$year-";
}
# elsif (uc($self->{'patent'}->{'doc_type'}) eq 'S') {
# my $year = substr($self->{'patent'}->{'number'}, 0, 2); # Heisei < 10 must have 0 prefix
# $self->{'patent'}->{'number'} =~ s{^\d\d}{}xm;
# $self->{'patent'}->{'doc_type'} .= "$year-";
# }
# elsif (uc($self->{'patent'}->{'doc_type'}) eq 'T') {
# my $year = substr($self->{'patent'}->{'number'}, 0, 2); # Heisei < 10 must have 0 prefix
# $self->{'patent'}->{'number'} =~ s{^\d\d}{}xm;
# $self->{'patent'}->{'doc_type'} = "$year-";
# }
# elsif (uc($self->{'patent'}->{'doc_type'}) eq 'M') {
# my $year = substr($self->{'patent'}->{'number'}, 0, 2); # Heisei < 10 must have 0 prefix
# $self->{'patent'}->{'number'} =~ s{^\d\d}{}xm;
# $self->{'patent'}->{'doc_type'} .= "$year-";
# }
elsif ( (substr($self->{'patent'}->{'number'}, 3, 1) ne q(-))
and (length($self->{'patent'}->{'number'}) > 7)
and (substr($self->{'patent'}->{'number'}, 0, 4) > 1992)
and substr($self->{'patent'}->{'number'}, 0, 4) <= ((localtime(time))[5] + 1900))
{
$self->{'patent'}->{'number'} =~ s{^(\d\d\d\d)}{$1-}xm;
}
}
$found = undef;
if (defined $self->{'patent'}->{'country'}) {
$found .= " country:$self->{'patent'}->{'country'} ";
}
else {$found .= ' country: "" ';}
if (defined $self->{'patent'}->{'doc_type'}) {
$found .= " type:$self->{'patent'}->{'doc_type'} ";
}
else {$found .= ' doc_type: "" ';}
if (defined $self->{'patent'}->{'number'}) {
$found .= " number:$self->{'patent'}->{'number'} ";
}
else {$found .= ' number: "" ';}
if (defined $self->{'patent'}->{'kind'}) {
$found .= " kind:$self->{'patent'}->{'kind'} ";
}
else {$found .= ' kind: "" ';}
if (defined $self->{'patent'}->{'comment'}) {
$found .= " comment:$self->{'patent'}->{'comment'} ";
}
else {$found .= ' comment: "" ';}
if ( $self->{'patent'}->{'doc_type'}
&& $self->{'patent'}->{'kind'})
{
$self->{'patent'}->{'doc_id_standardized'} = $self->{'patent'}->{'country'}
. $self->{'patent'}->{'doc_type'}
. $self->{'patent'}->{'number'}
. $self->{'patent'}->{'kind'};
}
elsif ((!$self->{'patent'}->{'doc_type'})
&& (!$self->{'patent'}->{'kind'}))
{
$self->{'patent'}->{'doc_id_standardized'} = $self->{'patent'}->{'country'} . $self->{'patent'}->{'number'};
}
elsif (!$self->{'patent'}->{'kind'}) {
$self->{'patent'}->{'doc_id_standardized'}
= $self->{'patent'}->{'country'} . $self->{'patent'}->{'doc_type'} . $self->{'patent'}->{'number'};
}
else {
$self->{'patent'}->{'doc_id_standardized'}
= $self->{'patent'}->{'country'} . $self->{'patent'}->{'number'} . $self->{'patent'}->{'kind'};
}
return $found;
}
sub get_page {
my $self = shift;
my $count;
if (@_ % 2) {
$self->{'patent'}->{'doc_id'} = shift @_;
}
my %passed_parm = @_;
# Keep the patent-specific parms before USING the object.
# (the parameters defined above are the only user exposed parameters allowed)
while (my ($key, $value) = each %passed_parm) {
if (exists $self->{$key}) {
$self->{$key} = $value;
}
elsif (exists $self->{'patent'}->{$key}) {
$self->{'patent'}->{$key} = $value;
}
}
if ($self->{'patent'}->{'doc_id'}) {$self->parse_doc_id();}
my $response = WWW::Patent::Page::Response->new(%{$self->{'patent'}}); # make it here to run sanity tests
if (!$response->get_parameter('country')) {
$response->set_parameter('is_success', undef);
$response->set_parameter('message', 'no country defined');
# print "no country defined\n";
return $response;
}
if (!$_country_known{$response->get_parameter('country')}) {
$response->set_parameter('is_success', undef);
$response->set_parameter('message', q{country '} . $response->get_parameter('country') . q{' not recognized});
# print "country not recognized";
return $response;
}
if (!$response->get_parameter('number')) {
$response->set_parameter('is_success', undef);
$response->set_parameter('message', 'no patent number defined');
return $response;
}
if (!$response->get_parameter('office')) {
$response->set_parameter('is_success', undef);
$response->set_parameter('message', 'no office defined');
return $response;
}
if (!$response->get_parameter('format')) {
$response->set_parameter('is_success', undef);
$response->set_parameter('message', 'no format defined');
return $response;
}
my $provide_doc = "$self->{'patent'}->{'office'}" . '_' . "$self->{'patent'}->{'format'}";
if (!exists $METHODS{$provide_doc}) {
$response->set_parameter('is_success', undef);
$response->set_parameter('message', "method '$provide_doc' not provided");
return $response;
}
my $function_reference = $METHODS{$provide_doc}
or carp "No method '$provide_doc'";
# print "pass hash\n";
$response = &{$function_reference}($self, $response) # pass our hash to a specific fetcher
or carp "No response for method '$provide_doc'";
# print "hash back\n";
if (!$response) {carp 'no response to return'}
return $response;
}
sub terms {
my $self = shift; # pass $self, then optionally the office whose terms you need, or use that office set in $self
my $office;
if (@_ % 2) {$office = shift @_}
else {$office = $self->{'patent'}->{'office'}}
if (!exists $METHODS{$office . '_terms'}) {
carp "Undefined method $office" . '_terms in Patent:Document::Retrieve';
return ( 'WWW::Patent::Page uses publicly available information that may be subject to copyright.' . "\n"
. 'The user is responsible for observing intellectual property rights. ');
}
my $terms = $office . '_terms';
my $function_reference = $METHODS{$terms};
return &{$function_reference}($self);
}
sub request {
# intercept the LWP request to allow various things
my $self = shift;
my $count = 0;
# my $response=$HTTP::Response->new();
my $response = LWP::UserAgent::request($self, @_);
while (($count < 2) && (! $response->is_success) ) { # make $count assignable at start-up, configurable
$count++;
if ( $response->code == 500 ) { sleep 5; $response = LWP::UserAgent::request($self, @_); cluck 'server responded with code 500, internal server error, trying again for you in case they got their act together in the last few seconds' } # second chance
if ( $response->code == 503 ) { sleep 5; $response = LWP::UserAgent::request($self, @_); cluck 'server responded with code 503, Service Unavailable, trying again for you in case it became available in the last few seconds' } # second chance
}
if ( ! $response->is_success) {confess 'original url = "'.$_[0]->as_string().'", request that caused this response = "' . $response->request()->as_string.'", response code = "', $response->code(),'" = "'.$response->message.'", response as string = "'.$response->as_string. q(") ; }
# my $browser = HTML::Display->new(class => 'HTML::Display::Win32::IE',); # this will open a new window for every page of html!
# $browser->display(html => $response->content); # for testing to see the web pages
return $response;
}
sub login {
my $self = shift; # pass $self, then optionally the office whose terms you need, or use that office set in $self
my $username = shift || $self->{'patent'}->{'office_username'};
my $password = shift || $self->{'patent'}->{'office_password'};
my $login = $self->{'patent'}->{'office'} . '_login';
# print $login ;
my $function_reference = $METHODS{$login};
# print $$function_reference ;
return &{$function_reference}($self, $username, $password);
}
sub _agent {return "WWW::Patent::Page/$WWW::Patent::Page::VERSION"}
sub _load_modules {
my ($class, @modules) = (@_); # pass a list of the modules that will be available;
# add more to your call for this, for custom modules for other patent offices
my $baseclass = ref $class || $class;
# Go to each module and use them. Also record what methods
# they support and enter them into the %METHODS hash.
foreach my $module (@modules) {
my $modpath = "${baseclass}::${module}";
if (!defined $MODULES{$modpath}) { # unless already visited
# Have to use an eval here because perl doesn't like to use strings.
eval "use $modpath;";
if ($EVAL_ERROR) {carp $EVAL_ERROR}
$MODULES{$modpath} = 1;
# Methodhash will continue method-name, function ref
# pairs.
my %methodhash = $modpath->methods;
my ($method, $value);
while (($method, $value) = each %methodhash) {
$METHODS{$method} = $value;
}
}
}
return;
}
sub _load_country_known {
# from HANDBOOK ON INDUSTRIAL PROPERTY INFORMATION AND DOCUMENTATION
# Standard ST.3
# see http://www.wipo.int/scit/en/standards/pdf/03-03-01.pdf
# these codes reflect the versions used since 1978
# e.g. Algeria used to be AG, which is now Antigua, and Algeria is DZ.
# where no conflicts exist, antiquated codes are included
# such as CS for Czechoslovakia along with CZ for Czech Republic
# and SU for Soviet Union.
# Conflicts exist for International Patent Institute IB
# and Democratic Yemen SY
# see below for list by country, alphabetical
return (
'AE' => 'United Arab Emirates',
'AF' => 'Afghanistan',
'AG' => 'Antigua and Barbuda',
'AI' => 'Anguilla',
'AL' => 'Albania',
'AM' => 'Armenia',
'AN' => 'Netherlands Antilles',
'AO' => 'Angola',
'AP' => 'African Regional Intellectual Property Organization',
'AR' => 'Argentina',
'AT' => 'Austria',
'AU' => 'Australia',
'AW' => 'Aruba',
'AZ' => 'Azerbaijan',
'BA' => 'Bosnia and Herzegovina',
'BB' => 'Barbados',
'BD' => 'Bangladesh',
'BE' => 'Belgium',
'BF' => 'Burkina Faso',
'BG' => 'Bulgaria',
'BH' => 'Bahrain',
'BI' => 'Burundi',
'BJ' => 'Benin',
'BM' => 'Bermuda',
'BN' => 'Brunei Darussalam',
'BO' => 'Bolivia',
'BR' => 'Brazil',
'BS' => 'Bahamas',
'BT' => 'Bhutan',
'BV' => 'Bouvet Island',
'BW' => 'Botswana',
'BX' => 'Benelux Trademark Office',
'BY' => 'Belarus',
'BZ' => 'Belize',
'CA' => 'Canada',
'CD' => 'Democratic Republic of the Congo',
'CF' => 'Central African Republic',
'CG' => 'Congo',
'CH' => 'Switzerland',
'CI' => 'Côte dIvoire',
'CK' => 'Cook Islands',
'CL' => 'Chile',
'CM' => 'Cameroon',
'CN' => 'China',
'CO' => 'Colombia',
'CR' => 'Costa Rica',
'CS' => 'Czechoslovakia',
'CU' => 'Cuba',
'CV' => 'Cape Verde',
'CY' => 'Cyprus',
'CZ' => 'Czech Republic',
'DD' => 'Germany (Democratic Republic)',
'DE' => 'Germany',
'DJ' => 'Djibouti',
'DK' => 'Denmark',
'DL' => 'Germany (Democratic Republic)',
'DM' => 'Dominica',
'DO' => 'Dominican Republic',
'DZ' => 'Algeria',
'EA' => 'Eurasian Patent Organization',
'EC' => 'Ecuador',
'EE' => 'Estonia',
'EG' => 'Egypt',
'EH' => 'Western Sahara',
'EM' => 'Office for Harmonization in the Internal Market',
'EP' => 'European Patent Office',
'ER' => 'Eritrea',
'ES' => 'Spain',
'ET' => 'Ethiopia',
'FI' => 'Finland',
'FJ' => 'Fiji',
'FK' => 'Falkland Islands (Malvinas)',
'FO' => 'Faroe Islands',
'FR' => 'France',
'GA' => 'Gabon',
'GB' => 'United Kingdom',
'GC' => 'Patent Office of the Cooperation Council for the Arab States of the Gulf',
'GD' => 'Grenada',
'GE' => 'Georgia',
'GG' => 'Guernsey',
'GH' => 'Ghana',
'GI' => 'Gibraltar',
'GL' => 'Greenland',
'GM' => 'Gambia',
'GN' => 'Guinea',
'GQ' => 'Equatorial Guinea',
'GR' => 'Greece',
'GS' => 'South Georgia and the South Sandwich Islands',
'GT' => 'Guatemala',
'GW' => 'Guinea-Bissau',
'GY' => 'Guyana',
'HK' => 'The Hong Kong Special Administrative Region of the Peoples Republic of China',
'HN' => 'Honduras',
'HR' => 'Croatia',
'HT' => 'Haiti',
'HU' => 'Hungary',
'IB' => 'International Bureau of the World Intellectual Property Organization',
'ID' => 'Indonesia',
'IE' => 'Ireland',
'IL' => 'Israel',
'IM' => 'Isle of Man',
'IN' => 'India',
'IQ' => 'Iraq',
'IR' => 'Iran (Islamic Republic of)',
'IS' => 'Iceland',
'IT' => 'Italy',
'JE' => 'Jersey',
'JM' => 'Jamaica',
'JO' => 'Jordan',
'JP' => 'Japan',
'KE' => 'Kenya',
'KG' => 'Kyrgyzstan',
'KH' => 'Cambodia',
'KI' => 'Kiribati',
'KM' => 'Comoros',
'KN' => 'Saint Kitts and Nevis',
'KP' => 'Democratic Peoples Republic of Korea',
'KR' => 'Republic of Korea',
'KW' => 'Kuwait',
'KY' => 'Cayman Islands',
'KZ' => 'Kazakhstan',
'LA' => 'Lao Peoples Democratic Republic',
'LB' => 'Lebanon',
'LC' => 'Saint Lucia',
'LI' => 'Liechtenstein',
'LK' => 'Sri Lanka',
'LR' => 'Liberia',
'LS' => 'Lesotho',
'LT' => 'Lithuania',
'LU' => 'Luxembourg',
'LV' => 'Latvia',
'LY' => 'Libyan Arab Jamahiriya',
'MA' => 'Morocco',
'MC' => 'Monaco',
'MD' => 'Republic of Moldova',
'ME' => 'Montenegro',
'MG' => 'Madagascar',
'MK' => 'The former Yugoslav Republic of Macedonia',
'ML' => 'Mali',
'MM' => 'Myanmar',
'MN' => 'Mongolia',
'MO' => 'Macao',
'MP' => 'Northern Mariana Islands',
'MR' => 'Mauritania',
'MS' => 'Montserrat',
'MT' => 'Malta',
'MU' => 'Mauritius',
'MV' => 'Maldives',
'MW' => 'Malawi',
'MX' => 'Mexico',
'MY' => 'Malaysia',
'MZ' => 'Mozambique',
'NA' => 'Namibia',
'NE' => 'Niger',
'NG' => 'Nigeria',
'NI' => 'Nicaragua',
'NL' => 'Netherlands',
'NO' => 'Norway',
'NP' => 'Nepal',
'NR' => 'Nauru',
'NZ' => 'New Zealand',
'OA' => 'African Intellectual Property Organization',
'OM' => 'Oman',
'PA' => 'Panama',
'PE' => 'Peru',
'PG' => 'Papua New Guinea',
'PH' => 'Philippines',
'PK' => 'Pakistan',
'PL' => 'Poland',
'PT' => 'Portugal',
'PW' => 'Palau',
'PY' => 'Paraguay',
'QA' => 'Qatar',
'QZ' => 'Community Plant Variety Office (European Community) (CPVO)',
'RO' => 'Romania',
'RS' => 'Serbia',
'RU' => 'Russian Federation',
'RW' => 'Rwanda',
'SA' => 'Saudi Arabia',
'SB' => 'Solomon Islands',
'SC' => 'Seychelles',
'SD' => 'Sudan',
'SE' => 'Sweden',
'SG' => 'Singapore',
'SH' => 'Saint Helena',
'SI' => 'Slovenia',
'SK' => 'Slovakia',
'SL' => 'Sierra Leone',
'SM' => 'San Marino',
'SN' => 'Senegal',
'SO' => 'Somalia',
'SR' => 'Suriname',
'ST' => 'Sao Tome and Principe',
'SU' => 'Soviet Union',
'SV' => 'El Salvador',
'SY' => 'Syrian Arab Republic',
'SZ' => 'Swaziland',
'TC' => 'Turks and Caicos Islands',
'TD' => 'Chad',
'TG' => 'Togo',
'TH' => 'Thailand',
'TJ' => 'Tajikistan',
'TL' => 'TimorLeste',
'TM' => 'Turkmenistan',
'TN' => 'Tunisia',
'TO' => 'Tonga',
'TR' => 'Turkey',
'TT' => 'Trinidad and Tobago',
'TV' => 'Tuvalu',
'TW' => 'Taiwan, Province of China',
'TZ' => 'United Republic of Tanzania',
'UA' => 'Ukraine',
'UG' => 'Uganda',
'US' => 'United States of America',
'UY' => 'Uruguay',
'UZ' => 'Uzbekistan',
'VA' => 'Holy See',
'VC' => 'Saint Vincent and the Grenadines',
'VE' => 'Venezuela',
'VG' => 'Virgin Islands (British)',
'VN' => 'Viet Nam',
'VU' => 'Vanuatu',
'WO' => 'World Intellectual Property Organization',
'WS' => 'Samoa',
'YD' => 'Yemen (Democratic)',
'YE' => 'Yemen',
'ZA' => 'South Africa',
'ZM' => 'Zambia',
'ZW' => 'Zimbabwe',
);
# alphabetical by country
# Afghanistan _ AF
# African Intellectual Property Organization _ OA
# African Regional Intellectual Property Organization _ AP
# Albania _ AL
# Algeria _ DZ
# Angola _ AO
# Anguilla _ AI
# Antigua and Barbuda _ AG
# Argentina _ AR
# Armenia _ AM
# Aruba _ AW
# Australia _ AU
# Austria _ AT
# Azerbaijan _ AZ
# Bahamas _ BS
# Bahrain _ BH
# Bangladesh _ BD
# Barbados _ BB
# Belarus _ BY
# Belgium _ BE
# Belize _ BZ
# Benelux Trademark Office _ BX
# Benin _ BJ
# Bermuda _ BM
# Bhutan _ BT
# Bolivia _ BO
# Bosnia and Herzegovina _ BA
# Botswana _ BW
# Bouvet Island _ BV
# Brazil _ BR
# Brunei Darussalam _ BN
# Bulgaria _ BG
# Burkina Faso _ BF
# Burundi _ BI
# Cambodia _ KH
# Cameroon _ CM
# Canada _ CA
# Cape Verde _ CV
# Cayman Islands _ KY
# Central African Republic _ CF
# Chad _ TD
# Chile _ CL
# China _ CN
# Colombia _ CO
# Community Plant Variety Office (European Community) (CPVO) _ QZ
# Comoros _ KM
# Congo _ CG
# Cook Islands _ CK
# Costa Rica _ CR
# Croatia _ HR
# Cuba _ CU
# Cyprus _ CY
# Czech Republic _ CZ
# Czechoslovakia _ CS
# Côte dIvoire _ CI
# Democratic Peoples Republic of Korea _ KP
# Democratic Republic of the Congo _ CD
# Denmark _ DK
# Djibouti _ DJ
# Dominica _ DM
# Dominican Republic _ DO
# Ecuador _ EC
# Egypt _ EG
# El Salvador _ SV
# Equatorial Guinea _ GQ
# Eritrea _ ER
# Estonia _ EE
# Ethiopia _ ET
# Eurasian Patent Organization _ EA
# European Patent Office _ EP
# Falkland Islands (Malvinas) _ FK
# Faroe Islands _ FO
# Fiji _ FJ
# Finland _ FI
# France _ FR
# Gabon _ GA
# Gambia _ GM
# Georgia _ GE
# Germany _ DE
# Germany (Democratic Republic) _ DD
# Germany (Democratic Republic) _ DL
# Ghana _ GH
# Gibraltar _ GI
# Greece _ GR
# Greenland _ GL
# Grenada _ GD
# Guatemala _ GT
# Guernsey _ GG
# Guinea _ GN
# Guinea-Bissau _ GW
# Guyana _ GY
# Haiti _ HT
# Holy See _ VA
# Honduras _ HN
# Hungary _ HU
# Iceland _ IS
# India _ IN
# Indonesia _ ID
# International Bureau of the World Intellectual Property Organization _ IB
# Iran (Islamic Republic of) _ IR
# Iraq _ IQ
# Ireland _ IE
# Isle of Man _ IM
# Israel _ IL
# Italy _ IT
# Jamaica _ JM
# Japan _ JP
# Jersey _ JE
# Jordan _ JO
# Kazakhstan _ KZ
# Kenya _ KE
# Kiribati _ KI
# Kuwait _ KW
# Kyrgyzstan _ KG
# Lao Peoples Democratic Republic _ LA
# Latvia _ LV
# Lebanon _ LB
# Lesotho _ LS
# Liberia _ LR
# Libyan Arab Jamahiriya _ LY
# Liechtenstein _ LI
# Lithuania _ LT
# Luxembourg _ LU
# Macao _ MO
# Madagascar _ MG
# Malawi _ MW
# Malaysia _ MY
# Maldives _ MV
# Mali _ ML
# Malta _ MT
# Mauritania _ MR
# Mauritius _ MU
# Mexico _ MX
# Monaco _ MC
# Mongolia _ MN
# Montenegro _ ME
# Montserrat _ MS
# Morocco _ MA
# Mozambique _ MZ
# Myanmar _ MM
# Namibia _ NA
# Nauru _ NR
# Nepal _ NP
# Netherlands _ NL
# Netherlands Antilles _ AN
# New Zealand _ NZ
# Nicaragua _ NI
# Niger _ NE
# Nigeria _ NG
# Northern Mariana Islands _ MP
# Norway _ NO
# Office for Harmonization in the Internal Market _ EM
# Oman _ OM
# Pakistan _ PK
# Palau _ PW
# Panama _ PA
# Papua New Guinea _ PG
# Paraguay _ PY
# Patent Office of the Cooperation Council for the Arab States of the Gulf _ GC
# Peru _ PE
# Philippines _ PH
# Poland _ PL
# Portugal _ PT
# Qatar _ QA
# Republic of Korea _ KR
# Republic of Moldova _ MD
# Romania _ RO
# Russian Federation _ RU
# Rwanda _ RW
# Saint Helena _ SH
# Saint Kitts and Nevis _ KN
# Saint Lucia _ LC
# Saint Vincent and the Grenadines _ VC
# Samoa _ WS
# San Marino _ SM
# Sao Tome and Principe _ ST
# Saudi Arabia _ SA
# Senegal _ SN
# Serbia _ RS
# Seychelles _ SC
# Sierra Leone _ SL
# Singapore _ SG
# Slovakia _ SK
# Slovenia _ SI
# Solomon Islands _ SB
# Somalia _ SO
# South Africa _ ZA
# South Georgia and the South Sandwich Islands _ GS
# Soviet Union _ SU
# Spain _ ES
# Sri Lanka _ LK
# Sudan _ SD
# Suriname _ SR
# Swaziland _ SZ
# Sweden _ SE
# Switzerland _ CH
# Syrian Arab Republic _ SY
# Taiwan, Province of China _ TW
# Tajikistan _ TJ
# Thailand _ TH
# The Hong Kong Special Administrative Region of the Peoples Republic of China _ HK
# The former Yugoslav Republic of Macedonia _ MK
# TimorLeste _ TL
# Togo _ TG
# Tonga _ TO
# Trinidad and Tobago _ TT
# Tunisia _ TN
# Turkey _ TR
# Turkmenistan _ TM
# Turks and Caicos Islands _ TC
# Tuvalu _ TV
# Uganda _ UG
# Ukraine _ UA
# United Arab Emirates _ AE
# United Kingdom _ GB
# United Republic of Tanzania _ TZ
# United States of America _ US
# Uruguay _ UY
# Uzbekistan _ UZ
# Vanuatu _ VU
# Venezuela _ VE
# Viet Nam _ VN
# Virgin Islands (British) _ VG
# Western Sahara _ EH
# World Intellectual Property Organization _ WO
# Yemen _ YE
# Yemen (Democratic) _ YD
# Zambia _ ZM
# Zimbabwe _ ZW
}
1; #this line is important and will help the module return a true value
__END__
=head1 NAME
WWW::Patent::Page - get patent documents
from WWW source (e.g.
( not available: JP->Eng translations in HTML from JPO,)
complete US applications and grants from
(USPTO), and place into a WWW::Patent::Page::Response object)
(note: ESPACE_EP not provided due to captcha use..)
=head1 VERSION
This document describes WWW::Patent::Page version 0.100.0 of February, 2007.
=head1 SYNOPSIS
Please see the test suite for working examples in t/ . The following is not guaranteed to be working or up-to-date.
THE ONLY OFFICE CURRENTLY WORKING IS THE USPTO.
$ perl -I. -MWWW::Patent::Page -e 'print $WWW::Patent::Page::VERSION,"\n"'
0.02
$ perl get_patent.pl US6123456 > US6123456.pdf &
$ perl -wT get_JPO_patent_translation_to_english.pl "JPH09-123456A" > JPH09-123456A.zip &
( see examples/JPH09-123456A.zip for an html formatted, machine translated, Japanese patent document. )
(command line interfaces are included in examples/ )
http://www.yourdomain.com/www_get_patent_pdf.pl
http://www.yourdomain.com/www_get_JPO_patent_translation_to_english.pl
(web fetchers are included in examples/ )
Typical usage in perl code:
use WWW::Patent::Page;
print $WWW::Patent::Page::VERSION,"\n";
my $patent_browser = WWW::Patent::Page->new(); # new object
my $document1 = $patent_document->get_page('6,123,456');
# defaults:
# country => 'US',
# format => 'pdf',
# page => undef ,
# and usual defaults of LWP::UserAgent (subclassed)
my $document2 = $patent_document->get_page('US6123456',
format => 'pdf',
page => 2 , #get only the second page
);
my $pages_known = $document2->get_parameter('pages'); #how many total pages known?
=head1 DESCRIPTION
Intent: Use public sources to retrieve patent documents such as
TIFF images of patent pages, html of patents, pdf, etc.
Expandable for your office of interest by writing new submodules..
Alpha release by newbie to find if there is any interest
=head1 USAGE
See also SYNOPSIS above
Standard process for building & installing modules:
perl Build.PL
./Build
./Build test verbose=1
./Build install
or
perl Makefile.PL
make
make test TEST_VERBOSE=1
make install
or on ActiveState or otherwise using nmake
perl Makefile.PL
nmake
nmake test TEST_VERBOSE=1
nmake install
Examples of use:
$patent_browser = WWW::Patent::Page->new(
doc_id => 'US6,654,321',
format => 'pdf',
page => undef , # returns all pages in one pdf
agent => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
);
$patent_response = $patent_browser->get_patent('US6,654,321(B2)issued_2_Okada');
=head1 INTERFACE
Object oriented, and modelled on LWP.
=head1 SUBROUTINES/METHODS
=head2 new
NEW instance of the Page class, subclassing LWP::UserAgent
=cut
=head2 login
login to a server to use its services; obtain a token or session id or the like
=cut
=head2 country_known
country_known maps the known two letter acronyms to patenting entities, usually countries; country_known returns undef if the two letter acronym is not recognized.
=cut
=head2 parse_doc_id
Takes a human readable patent/publication identifier and parses it into country/entity, kind, number, doc_type, ...
CC[TY]##,###,###(K#)_Comments
US_6,123,456_A1_-comments
CC : Two letter country/entity code; e.g. US, EP, WO
TY : Type of document; one or two letters only of these choices:
e.g. in US, Kind = Utility is default and no "Kind" is used, e.g. US6123456
D : Design, e.g. USD339,456
PP: Plant, e.g. USPP8,901
RE: Reissue, e.g. USRE35,312
T : Defensive Publication, e.g. UST109,201
SIR: Statutory Invention Registration, e.g. USH1,523
##,###,### Document number (e.g. patent number or application number- only digits and optionally separators, no letters)
K# : the kind or version number, e.g. A1, B2, etc.; placed in parenthesis- at least one letter and at most one number. Not always used in document fetching.
Comments: retained but not used- single string of word characters \w = A-z0-9_ (no spaces, "-", commas, etc.)
Separators (comma, space, dash, underscore) may occur between entries, and at least one MUST occur before a comment (due to difficulty of parsing the kind code which might be one letter).
Separators (the comma is handy) may occur within the number
As of version 0.1, the parsed result used at the office of choice is placed in
$self->patent->doc_id_standardized
A convenience value of
$self->patent->doc_id_commified
is provided.
In recognizing the values such as CC country, the priority is:
$self->patent->doc_id as supplied; if absent:
$self->patent->country; if absent:
$WWW::Patent::Page::default_country
=cut
=head2 get_page
method to use the modules specific to Offices like USPTO, with methods for each document/page format, etc., and
LWP::Agent to grab the appropriate URLs and if necessary build the response content or produce error values
=cut
=head2 request
Method to override the LWP::UserAgent::request that gets a URL.
This calls LWP::UserAgent::request itself, but around it adds things like a retry (and possibly debugging, like throwing pages to a browser for display).
=cut
=head2 terms
method to provide a summary or pointers to the terms and conditions of use of the publicly available databases
=head2 _load_modules
internal private method to access helper modules in WWW::Patent::Page
=cut
=head2 _agent
private method to assign default agent
=cut
=head2 _load_country_known
private method to load a big hash and allow it to be folded during code development.
=cut
=head1 DIAGNOSTICS
The accepted tactic is to set $self->{'is_success'} or $self->{'patent'}->{'is_success'} to false and add a message to $self->{'message'} or $self->{'patent'}->{'message'}
=head1 CONFIGURATION AND ENVIRONMENT
WWW::Patent::Page requires no configuration files or environment variables.
WWW::Patent::Page makes use of LWP environmental variables such as HTTP_PROXY.
=head1 DEPENDENCIES
LWP::UserAgent
HTTP::Response
=head1 INCOMPATIBILITIES
None reported.
=head1 BUGS AND LIMITATIONS
Code contributions, suggestions, and critiques are welcome.
Error handling is undeveloped.
By definition, a non-trivial program contains bugs.
For United States Patents (US) via the USPTO (USPTO), the 'kind' is ignored in method provide_doc
=head1 AUTHOR
Wanda B. Anon
Wanda.B.Anon@gmail.com
=head1 LICENSE AND COPYRIGHT
Copyright (c) 2008, Wanda B. Anon wanda.b.anon@GMAIL.com .
All rights reserved.
This program is free software; you can redistribute
it and/or modify it under the Artistic License version 2.0
or above ( http://www.perlfoundation.org/artistic_license_2_0 ) .
=head1 ACKNOWLEDGEMENTS
Hermann Schier, Lokkju, Andy Lester,
the authors of Finance::Quote, Erik Oliver for patentmailer, Howard P. Katseff of AT&T Laboratories for wsp.pl, version 2,
a proxy that speaks LWP and understands proxies, and of course Larry and Randal and the gang.
=head1 DISCLAIMER OF WARRANTY
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
NECESSARY SERVICING, REPAIR, OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.