The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package RDF::vCard::Importer;

use 5.008;
use strict;
use warnings;
no warnings qw(uninitialized);

use Encode qw[];
use RDF::TrineX::Functions -shortcuts;
use RDF::vCard::Entity;
use RDF::vCard::Line;
use Text::vFile::asData;

use namespace::clean;

our $VERSION = '0.012';

sub new
{
	my ($class, %options) = @_;
	my $self  = bless { %options }, $class;
	$self->init unless $self->model;
	return $self;
}

sub init
{
	my ($self, $model, %opts) = @_;
	$self->{model} = rdf_parse($model, %opts);
	return $self;
}

sub model
{
	my ($self) = @_;
	return $self->{model};
}

*TO_RDF = \&model;

sub _ua
{
	my ($self) = @_;
	$self->{ua} ||= LWP::UserAgent->new(agent => sprintf('%s/%s', __PACKAGE__, $VERSION));
	return $self->{ua};
}

sub import_file
{
	my ($self, $file, %options) = @_;
	open my $fh, "<:encoding(UTF-8)", $file;
	my $cards = Text::vFile::asData->new->parse($fh);
	close $fh;
	return $self->_process($cards, %options);
}

sub import_fh
{
	my ($self, $fh, %options) = @_;
	my $cards = Text::vFile::asData->new->parse($fh);
	return $self->_process($cards, %options);
}

sub import_url
{
	my ($self, $url) = @_;
	my $r = $self->_ua->get($url, Accept=>'text/directory;profile=vCard, text/vcard, text/x-vcard, text/directory;q=0.1');
	return unless $r->is_success;
	return $self->import_string($r->decoded_content, lang => $r->content_language);
}

sub import_string
{
	my ($self, $data, %options) = @_;
	my @lines = split /\r?\n/, $data;
	my $cards = Text::vFile::asData->new->parse_lines(@lines);
	return $self->_process($cards, %options);
}

sub _process
{
	my ($self, $cards, %options) = @_;
	
	my @Cards;
	foreach my $c (@{ $cards->{objects} })
	{
		push @Cards, $self->_process_card($c, %options);
	}
	
	return @Cards;
}

sub _process_card
{
	my ($self, $card, %options) = @_;
	my $C = RDF::vCard->new_entity( profile => $card->{type} );
	
	while (my ($prop, $vals) = each %{ $card->{properties} })
	{
		my $group;
		if ($prop =~ /^(.+)\.([^\.]+)$/) # ignore vCard 4.0 grouping construct
		{
			$prop  = $2;
			$group = $1;
		}
		
		foreach my $val (@$vals)
		{
			my $strval = $val->{value};
			
			# I wish Text::vFile::asData did this for me!
			my $structured_value = ($prop =~ /^(ADR|CATEGORIES|GEO|N|ORG)$/i)
				? $self->_extract_structure($strval)
				: RDF::vCard::Line->_unescape_value($strval);
			# Could technically extract structure for all properties,
			# but it's a waste of time, and some of the RDF::vCard::Line
			# code might cope badly.
			
			my $L = RDF::vCard::Line->new(
				property         => uc $prop,
				value            => $structured_value,
				type_parameters  => do {
					                   # force keys to uppercase
				                      my (%tp, $k, $v);
				                      $tp{uc $k} = $v while ($k, $v) = each %{$val->{param}};
											 \%tp;
				                    },
				);
			$L->type_parameters->{TYPE} = [split /,/, $L->type_parameters->{TYPE}]
				if ($L->type_parameters and $L->type_parameters->{TYPE});
			$L->type_parameters->{_GROUP} = $group
				if $group;
			$L->type_parameters->{LANG} ||= $options{lang} if defined $options{lang};
			
			$C->add($L);
		}
	}
	
	$C->add_to_model( $self->model );
	
	return $C;
}

sub _extract_structure
{
	my ($self, $string) = @_;
	my @naive_parts = split /;/, $string;
	my @parts;
	
	while (my $part = shift @naive_parts)
	{
		push @parts, $part;
		while ($parts[-1] =~ /\\$/ and $parts[-1] !~ /\\\\$/ and @naive_parts)
		{
			$parts[-1] =~ s/\\$/;/;
			$parts[-1] .= shift @naive_parts;
		}
	}
	
	my @rv;
	foreach my $part (@parts)
	{
		my @naive_subparts = split /,/, $part;
		my @subparts;

		while (my $subpart = shift @naive_subparts)
		{
			push @subparts, $subpart;
			while ($subparts[-1] =~ /\\$/ and $subparts[-1] !~ /\\\\$/ and @naive_subparts)
			{
				$subparts[-1] =~ s/\\$/,/;
				$subparts[-1] .= shift @naive_subparts;
			}
		}
		
		push @rv, [ map { RDF::vCard::Line->_unescape_value($_) } @subparts ];
	}
	return [@rv];
}

1;

__END__

=head1 NAME

RDF::vCard::Importer - import RDF data from vCard format

=head1 SYNOPSIS

 use RDF::vCard;
 use RDF::TrineShortcuts qw':all';
 
 my $importer = RDF::vCard::Importer->new;
 print $_
	foreach $importer->import_file('contacts.vcf');
 print rdf_string($importer->model => 'RDFXML');

=head1 DESCRIPTION

This module reads vCards and writes RDF.

=head2 Constructor

=over

=item * C<< new(%options) >>

Returns a new RDF::vCard::Importer object and initialises it.

The only valid option currently is B<ua> which can be set to an LWP::UserAgent
for those rare occasions that the Importer needs to fetch stuff from the Web.

=back

=head2 Methods

=over

=item * C<< init >>

Reinitialise the importer. Forgets any cards that have already been imported.

=item * C<< model >>

Return an RDF::Trine::Model containing data for all cards that have been
imported since the importer was last initialised.

=item * C<< import_file($filename, %options) >>

Imports vCard data from a file on the file system.

The data is added to the importer's model (and can be retrieved using the
C<model> method).

This function returns a list of L<RDF::vCard::Entity> objects, so it's
also possible to access the data that way.

There is currently only one supported option: C<lang> which takes an
ISO language code indicating the default language of text within the vCard
data.

=item * C<< import_fh($filehandle, %options) >>

As per C<import_file>, but operates on a file handle.

=item * C<< import_string($string, %options) >>

As per C<import_file>, but operates on vCard data in a string.

=item * C<< import_url($url) >>

As per C<import_file>, but fetches vCard data from a Web address.

Sends an HTTP Accept header of:

  text/directory;profile=vCard,
  text/vcard,
  text/x-vcard,
  text/directory;q=0.1

=back

=begin private

=item TO_RDF

=end private

=head2 vCard Input

vCard 3.0 should be supported fairly completely. Some vCard 4.0 constructs
will also work.

Much of the heavy lifting is performed by L<Text::vFile::asData>, so this
module may be affected by bugs in that distribution.

=head2 RDF Output

Output uses the newer of the 2010 revision of the W3C's vCard vocabulary
L<http://www.w3.org/Submission/vcard-rdf/>. (Note that even though this
was revised in 2010, the term URIs include "2006" in them.)

Some extensions from the namespace L<http://buzzword.org.uk/rdf/vcardx#>
are also output.

The AGENT property is currently omitted from output. This will be added
in a later version.

=head1 SEE ALSO

L<RDF::vCard>.

L<http://www.w3.org/Submission/vcard-rdf/>.

L<http://www.perlrdf.org/>.

=head1 AUTHOR

Toby Inkster E<lt>tobyink@cpan.orgE<gt>.

=head1 COPYRIGHT

Copyright 2011 Toby Inkster

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.