package Business::LCCN;
use 5.6.1;
use Carp qw( carp );
use Moose;
use Moose::Util::TypeConstraints;
use Scalar::Util qw( blessed );
use URI;
use strict;
use warnings;
=head1 NAME
Business::LCCN - Work with Library of Congress Control Number (LCCN) codes
=head1 VERSION
Version 1.01
=cut
our $VERSION = '1.01';
=head1 SYNOPSIS
Work with Library of Congress Control Number (LCCN) codes.
use Business::LCCN;
my $lccn = Business::LCCN->new('he 68001993 /HE/r692');
if ($lccn) {
# parse LCCN (common fields)
print 'Prefix ', $lccn->prefix, "\n"; # "he"
print 'Prefix field ', $lccn->prefix_encoded, "\n"; # "he "
print 'Year cataloged ', $lccn->year_cataloged, "\n"; # 1968
print 'Year field ', $lccn->year_encoded, "\n"; # "68"
print 'Serial ', $lccn->serial, "\n"; # "001993"
# stringify LCCN:
# canonical format: "he 68001993 /HE/r692"
print 'Canonical ', $lccn->canonical, "\n";
# simple normalized format: "he68001993"
print 'Normalized ', $lccn->normalized,"\n";
# info: URI: "info:lccn:he68001993"
print 'Info URI ', $lccn->info_uri, "\n";
# lccn.loc.gov permalink: "http://lccn.loc.gov/he68001993"
print 'Permalink ', $lccn->permalink,"\n";
# parse LCCN (uncommon fields)
print 'LCCN Type ', $lccn->lccn_structure, "\n"; # "A" or "B"
print 'Suffix field ', $lccn->suffix_encoded, \n"; # "/HE"
print 'Suffix parts ', $lccn->suffix_alphabetic_identifiers,
"\n"; # ("HE")
print 'Rev year', $lccn->revision_year, "\n"; # 1969
print 'Rev year field ',$lccn->revision_year_encoded,
"\n"; # "69"
print 'Rev number ', $lccn->revision_number,"\n"; # 2
} else {
print " Error : Invalid LCCN \n ";
}
=cut
use overload
'==' => \&_overload_equality,
'eq' => \&_overload_equality,
'""' => \&_overload_string;
subtype 'LCCN_Year' => as 'Int' => where { $_ >= 1898 };
subtype 'LCCN_Serial' => as 'Str' => where {m/^\d{6}$/};
enum 'LCCN_Structure' => qw( A B );
# normalize syntax at http://www.loc.gov/marc/lccn-namespace.html
subtype 'LCCN_Normalized' => as 'Str' =>
where {m/^(?:[a-z](?:[a-z](?:[a-z]|\d{2})?|\d\d)?|\d\d)?\d{8}$/};
subtype 'URI' => as 'Object' => where { $_->isa('URI') };
coerce 'URI' => from 'Str' => via { URI->new($_) };
has 'original' => ( is => 'ro', isa => 'Maybe[Str]', required => 1 );
has 'lccn_structure' =>
( is => 'ro', isa => 'LCCN_Structure', required => 1 );
has 'year_encoded' => ( is => 'ro', isa => 'Str', required => 1 );
has 'year_cataloged' =>
( is => 'ro', isa => 'Maybe[LCCN_Year]', required => 0 );
has 'prefix' => ( is => 'ro', isa => 'Str', required => 1 );
has 'prefix_encoded' => ( is => 'ro', isa => 'Str', required => 1 );
has 'serial' => ( is => 'ro', isa => 'LCCN_Serial', required => 1 );
has 'suffix_encoded' =>
( is => 'ro', isa => 'Str', required => 1, default => '' );
has 'suffix_alphabetic_identifiers' => (
is => 'ro',
isa => 'ArrayRef[Str]',
lazy => 1,
default => sub { _suffix_alphabetic_identifiers(@_) },
);
has 'revision_year' => ( is => 'ro', isa => 'Maybe[Int]', required => 0 );
has 'revision_year_encoded' =>
( is => 'ro', isa => 'Str', required => 1, default => '' );
has 'revision_number' => ( is => 'ro', isa => 'Maybe[Int]', required => 0 );
has 'canonical' => ( is => 'ro',
isa => 'Str',
lazy => 1,
default => sub { _canonical(@_) },
);
has 'normalized' => ( is => 'ro',
isa => 'LCCN_Normalized',
lazy => 1,
default => sub { _normalized(@_) },
);
has 'permalink' => ( is => 'ro',
isa => 'URI',
lazy => 1,
default => sub { _permalink(@_) }
);
has 'info_uri' => ( is => 'ro',
isa => 'URI',
lazy => 1,
default => sub { _info_uri(@_) }
);
around 'new' => sub {
my ( $next, $self, $input, $options ) = @_;
unless ( $options and ref $options and ref $options eq 'HASH' ) {
$options = {};
}
my $emit_warnings = !$options->{no_warnings};
if ( !defined $input ) {
carp q{Received an undefined value as LCCN input.} if $emit_warnings;
return;
} elsif ( !length $input ) {
carp q{Received an empty string as LCCN input.} if $emit_warnings;
return;
} else {
my %out = ( original => $input );
# clean up any leading or trailing whitespace
$input =~ s/^\s+|\s+$//g;
# accept permalinks
$input =~ s{^http://lccn.loc.gov/}{};
# accept info: uris
$input =~ s{^info:lccn/}{};
# try LCCN structure B
if ($input =~ m{
^
([a-zA-Z\s]{0,2}) # 2-letter alphabetic prefix
\s? # space, not officially allowed
([2-9]\d\d\d) # 4-letter year
(?:
-(\d{1,6}) # hyphen plus 1-6 digit serial number
| # or...
(\d{6}) # 6 digit serial number
)
$ }x
) {
$out{lccn_structure} = 'B';
$out{prefix_encoded} = $1;
$out{year_encoded} = $2;
$out{serial} = ( defined $3 ? $3 : $4 );
$out{year_cataloged} = $out{year_encoded};
# try LCCN structure A
} elsif (
$input =~ m{
^
([a-zA-Z\s]{0,3}) # 3-letter alphabetic prefix
(\d\d) # 2-letter year
(?:
-(\d{1,6}) # hyphen plus 1-6 digit serial number
| # or...
(\d{6}) # 6 digit serial number
)
(?:
(?:\s|(?!\d)) # blank for supplement
(/[A-Z]{1,3})* # suffix/alphabetic identifiers
(?://?
r(\d\d) # revision year encoded
(\d*))? # revision number
)?
$ }x
) {
$out{lccn_structure} = 'A';
$out{prefix_encoded} = $1;
$out{year_encoded} = $2;
$out{serial} = ( defined $3 ? $3 : $4 );
$out{suffix_encoded} = ( defined($5) ? $5 : '' );
$out{revision_year_encoded} = $6;
$out{revision_number} = ( $7 || undef );
# per http://www.loc.gov/marc/marbi/dp/dp84.html and
# http://en.wikipedia.org/wiki/Library_of_Congress_Control_Number,
# the first LCCNs were assigned in 1898, and there were fewer than
# 8000 LCCns issued each of those years
if ( $out{year_encoded} eq '98' ) {
if ( $out{serial} < 3000 ) {
$out{year_cataloged} = 1898;
} else {
$out{year_cataloged} = 1998;
}
} elsif ( $out{year_encoded} eq '99' ) {
if ( $out{serial} < 6000 ) {
$out{year_cataloged} = 1899;
} else {
$out{year_cataloged} = 1999;
}
} elsif ( $out{year_encoded} eq '00' ) {
if ( $out{serial} < 8000 ) {
$out{year_cataloged} = 1900;
} else {
$out{year_cataloged} = 2000;
}
} elsif ( $out{year_encoded} eq '50' ) {
$out{lccn_externally_created_flag} = 1; # zzz
} elsif ( $out{year_encoded} =~ m/^7\d$/ ) {
if ( _verify_7_checksum( $out{year_encoded}, $out{serial} ) )
{
$out{lccn_structure_series} = 7;
} else {
$out{year_cataloged} = $out{year_encoded} + 1900;
}
} else {
$out{year_cataloged} = $out{year_encoded} + 1900;
}
if ( defined $out{revision_year_encoded}
and length $out{revision_year_encoded} ) {
if ( $out{revision_year_encoded} == 98
or $out{revision_year_encoded} == 99 ) {
$out{revision_year} = $out{revision_year_encoded} + 1800;
} else {
$out{revision_year} = $out{revision_year_encoded} + 1900;
}
}
} else {
if ( $input !~ m/\d\d/ ) {
carp
qq{LCCN input "$input" doesn't contain enough numbers. Please check the input and try again.}
if $emit_warnings;
} elsif ( $input =~ m/^\s*(0(?:01|10))\b/ ) {
carp
qq{LCCN input "$input" starts with "$1", suggesting you've copied in part of a MARC record. Please remove MARC record formatting from the LCCN.}
if $emit_warnings;
} elsif ( $input =~ m/^\s*(\$[ab])\b/ ) {
carp
qq{LCCN $input "input" starts with "$1", suggesting you've copied in part of a MARC record. Please remove MARC record formatting from the LCCN.}
if $emit_warnings;
} elsif ( $input =~ m/#/ ) {
carp
qq{LCCN input "$input" contains "#" characters, which are sometimes used as placeholders for spaces Please remove the "#" characters from the LCCN input.}
if $emit_warnings;
} elsif ( $input =~ m/^\s*(_[a-z])\b\s*/ ) {
carp
qq{LCCN input "$input" starts with "$1", which may be MARC formatting. Please remove any such formatting from the LCCN.}
if $emit_warnings;
} else {
carp qq{LCCN input "$input" cannot be parsed.}
if $emit_warnings;
}
return;
}
my $req_prefix_length = ( $out{lccn_structure} eq 'A' ? 3 : 2 );
# fixup serial
$out{serial} = sprintf '%06i', $out{serial};
# fixup prefix
if ( defined $out{prefix_encoded} ) {
$out{prefix_encoded} =~ s/^\s+|\s+$//;
$out{prefix_encoded} = lc $out{prefix_encoded};
unless ( length $out{prefix_encoded} == $req_prefix_length ) {
$out{prefix_encoded} .= ' '
x ( $req_prefix_length - length $out{prefix_encoded} );
}
$out{prefix} = $out{prefix_encoded};
$out{prefix} =~ s/\s+//g;
}
# fixup suffix
if ( !defined $out{suffix_encoded} ) {
$out{suffix_encoded} = '';
}
# fixup revision year
if ( !defined $out{revision_year_encoded} ) {
$out{revision_year_encoded} = '';
}
$next->( $self, \%out );
}
};
sub _canonical {
my $self = shift;
if ( $self->lccn_structure eq 'B' ) {
return
sprintf( "%- 2s%4i%06i",
$self->prefix, $self->year_encoded, $self->serial );
} elsif ( $self->lccn_structure eq 'A' ) {
my $string =
sprintf( "%- 3s%02i%06i %s",
$self->prefix, $self->year_encoded,
$self->serial, $self->suffix_encoded
);
if ( length $self->revision_year_encoded ) {
if ( !length $self->suffix_encoded ) {
$string .= '/';
}
$string .= '/r' . $self->revision_year_encoded;
if ( $self->revision_number ) {
$string .= $self->revision_number;
}
}
return $string;
} else { # should never get here
return '';
}
}
no Moose; # remove Moose keywords
# normalize documented at http://www.loc.gov/marc/lccn-namespace.html
# and http://lccn.loc.gov/lccnperm-faq.html
sub _normalized {
my $self = shift;
my $string = join '', $self->prefix, $self->year_encoded, $self->serial;
$string =~ s/[\s-]//g;
return $string;
}
# permalink syntax documented at http://lccn.loc.gov/lccnperm-faq.html
sub _permalink {
my $self = shift;
return URI->new( 'http://lccn.loc.gov/' . $self->normalized );
}
# info: uri syntax documented at http://www.loc.gov/standards/uri/info.html
sub _info_uri {
my $self = shift;
return URI->new( 'info:lccn/' . $self->normalized );
}
sub _overload_string {
my $self = shift;
return $self->canonical;
}
sub _overload_equality {
my ( $self, $other ) = @_;
my $other_lccn;
if ( ref($other) and blessed($other) and $other->isa('Business::LCCN') ) {
$other_lccn = $other;
} else {
$other_lccn = new Business::LCCN($other);
}
if ( !defined $other_lccn ) {
return 0;
} else {
return ( $self->normalized eq $other_lccn->normalized );
}
}
# returns a list of all the suffix alphabetic identifiers
sub _suffix_alphabetic_identifiers {
my $self = shift;
if ( length $self->{suffix_encoded} ) {
my @identifiers = $self->suffix_encoded =~ m{\b([A-Z]+)\b};
return \@identifiers;
} else {
return [];
}
}
sub _verify_7_checksum {
my ( $year_encoded, $serial ) = @_;
unless ( $year_encoded =~ m/^\d{2}$/
and $serial =~ m/^\d{6}$/ ) {
return 0;
}
my @year_digits = split //, $year_encoded;
my @serial_digits = split //, $serial;
my $product
= $year_digits[0] * 7
+ $year_digits[1] * 8
+ $serial_digits[0] * 4
+ $serial_digits[1] * 6
+ $serial_digits[2] * 3
+ $serial_digits[3] * 5
+ $serial_digits[4] * 2
+ $serial_digits[5] * 1;
if ( $product % 11 == 0 ) {
return 1;
} else {
return 0;
}
}
=head1 INTERFACE
=head2 Methods
=head3 C<new>
The new method takes a single encoded LCCN string, in a variety of
formats -- with or without hyphens, with proper spacing or without.
Examples:
"89-1234", "89-001234", "89001234", "2002-1234", "2002-001234",
"2002001234", " 89001234 ", " 2002001234", "a89-1234",
"a89-001234", "a89001234", "a2002-1234", "a2002-001234",
"a2002001234", "a 89001234 ", "a 2002001234", "ab98-1234",
"ab98-001234", "ab98001234", "ab2002-1234", "ab2002-001234",
"ab2002001234", "ab 98001234 ", "ab 2002001234", "abc89-1234",
"abc89-001234", "abc89001234", "abc89001234 ", permalinks URLs
like "http://lccn.loc.gov/2002001234" and info URIs like
"info:lccn/2002001234"
Returns a Business::LCCN object, or undef if the string can't be
parsed as a valid LCCN. If the string can't be parsed, C<new> will
warn with a diagnostic message explaining why the string was invalid.
C<new> can also take an optional hashref of options as a second
parameter. The only option supported is C<no_warnings>, which will
disable any diagnostic warnings explaining why a candidate LCCN string
was invalid:
# returns undef, issues warning about input not containing any digits
$foo = LCCN->new('x');
# returns undef, but does not issue any additional warning
$bar = LCCN->new( 'x', { no_warnings => 1 } );
=head3 LCCN attributes
=head3 C<lccn_structure>
LCCN structure type, either "A" (issued 1898-2000) or "B" (issued
2001-).
=head3 C<prefix>
LCCN's alphabetic prefix, 1-3 characters long. Returns an empty string
if LCCN has no prefix.
=head3 C<prefix_encoded>
The prefix as encoded, either two (structure A) or three (structure B)
characters long, space-padded.
=head3 C<year_cataloged>
The year a book was cataloged. Returns an undef in cases where the
cataloging year in unclear. For example, LCCN S<" 75425165 //r75">
has a cataloged year of 1975.
=head3 C<year_encoded>
A two (structure A) or four (structure B) digit string typically
representing the year the book was cataloged, but sometimes serving as
a checksum, or a source code. For example, LCCN S<" 75425165 //r75">
has an encoded year field of S<"75">.
=head3 C<serial>
A six-digit number zero-padded serial number. For example, LCCN
S<" 75425165 //r75"> has a serial number of S<"425165">.
=head3 C<suffix_alphabetic_identifiers>
Structure A LCCNs can include one or more 1-3 character
suffix/alphabetic identifiers. Returns a list of all identifiers
present. For example, for LCCN S<" 79139101 /AC/MN">,
suffix_alphabetic_identifiers returns ('AC', 'MN').
=head3 C<suffix_encoded>
The LCCN's suffix/alphabetic identifier field, as encoded in the LCCN.
Returns an empty string if no suffix present.
=head3 C<revision_year>
Structure A LCCNs can include a revision date in their
bibliographic records. Returns the four-digit year the record was
revised, or undef if not present. For example, LCCN
S<" 75425165 //r75"> has a revision year of 1975.
=head3 C<revision_year_encoded>
The two-letter revision date, as encoded in structure A LCCNs. Returns
an empty string if no revision year present. For example, LCCN
S<" 75425165 //r75"> has a revision year of C<"75">.
=head3 C<revision_number>
Some structure A LCCNs have a revision year and number,
representing the number of times the record has been revised. For
example, LCCN S<" 75425165 //r752"> has revision_number 2. Returns
undef if not present.
=head3 LCCN representations
=head3 C<canonical>
Returns the canonical 12+ character default representation of an
LCCN. For example, S<" 85000002 "> is the canonical representation of
S<"85000002">, S<"85-000002">, S<"85-2">, S<" 85000002">.
=head3 C<normalized>
Returns the normalized 9-12 character representation of an LCCN.
Normalized LCCNs are often used in URIs and Internet-era
representations. For example, S<"n2001050268"> is the normalized
representation of S<"n 85-000002 ">, S<"n85-2">, S<"n 85-0000002">.
=head3 C<info_uri>
Returns the info: URI for an LCCN. For example, the URI for LCCN
S<"n 85-000002 "> is S<"info:lccn/n85000002">.
=head3 C<original>
Returns the original representation of the LCCN, as passed to C<new>.
=head3 C<permalink>
Returns the Library of Congress permalink URL for an LCCN. For
example, the permalink URL for LCCN S<"n 85-000002 "> is
S<"http://lccn.loc.gov/n85000002">.
=head2 Operator overloading
=head3 C<"">
In string context, Business::LCCN objects stringify as the
canonical representation of the LCCN.
=head3 C<eq>, C<==>
Business::LCCN objects can be compared to other Business::LCCN
objects or LCCN strings.
=head1 SEE ALSO
L<Business::ISBN>, L<http://www.loc.gov/marc/lccn_structure.html>,
L<http://lccn.loc.gov/>,
L<http://www.loc.gov/standards/uri/info.html>,
L<http://en.wikipedia.org/wiki/Library_of_Congress_Control_Number>
=head1 DIAGNOSTICS
Running C<new> on invalid input may generate warnings, unless the
C<no_warnings> option is set.
=head1 AUTHOR
Anirvan Chatterjee, C<< <anirvan at cpan.org> >>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-business-lccn at rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Business-LCCN>. I
will be notified, and then you'll automatically be notified of
progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc Business::LCCN
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Business-LCCN>
=item * AnnoCPAN: Annotated CPAN documentation
L<http://annocpan.org/dist/Business-LCCN>
=item * CPAN Ratings
L<http://cpanratings.perl.org/d/Business-LCCN>
=item * Search CPAN
L<http://search.cpan.org/dist/Business-LCCN>
=back
=head1 COPYRIGHT & LICENSE
Copyright 2008 Anirvan Chatterjee, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1; # End of Business::LCCN
# Local Variables:
# mode: perltidy
# End: