The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#------------------------------------------------------------------------------
# File:         VCard.pm
#
# Description:  Read vCard and iCalendar meta information
#
# Revisions:    2015/04/05 - P. Harvey Created
#               2015/05/02 - PH Added iCalendar support
#
# References:   1) http://en.m.wikipedia.org/wiki/VCard
#               2) http://tools.ietf.org/html/rfc6350
#               3) http://tools.ietf.org/html/rfc5545
#------------------------------------------------------------------------------

package Image::ExifTool::VCard;

use strict;
use vars qw($VERSION);
use Image::ExifTool qw(:DataAccess :Utils);

$VERSION = '1.04';

my %unescapeVCard = ( '\\'=>'\\', ','=>',', 'n'=>"\n", 'N'=>"\n" );

# lookup for iCalendar components (used to generate family 1 group names if top level)
my %isComponent = ( Event=>1, Todo=>1, Journal=>1, Freebusy=>1, Timezone=>1, Alarm=>1 );

my %timeInfo = (
    # convert common date/time formats to EXIF style
    ValueConv => q{
        $val =~ s/(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})(Z?)/$1:$2:$3 $4:$5:$6$7/g;
        $val =~ s/(\d{4})(\d{2})(\d{2})/$1:$2:$3/g;
        $val =~ s/(\d{4})-(\d{2})-(\d{2})/$1:$2:$3/g;
        return $val;
    },
    PrintConv => '$self->ConvertDateTime($val)',
);

# vCard tags (ref 1/2/PH)
# Note: The case of all tag ID's is normalized to lowercase with uppercase first letter
%Image::ExifTool::VCard::Main = (
    GROUPS => { 2 => 'Document' },
    VARS => { NO_LOOKUP => 1 }, # omit tags from lookup
    NOTES => q{
        This table lists common vCard tags, but ExifTool will also extract any other
        vCard tags found.  Tag names may have "Pref" added to indicate the preferred
        instance of a vCard property, and other "TYPE" parameters may also added to
        the tag name.  VCF files may contain multiple vCard entries which are
        distinguished by the ExifTool family 3 group name (document  number). See
        L<http://tools.ietf.org/html/rfc6350> for the vCard 4.0 specification.
    },
    Version     => { Name => 'VCardVersion',   Description => 'VCard Version' },
    Fn          => { Name => 'FormattedName',  Groups => { 2 => 'Author' } },
    N           => { Name => 'Name',           Groups => { 2 => 'Author' } },
    Bday        => { Name => 'Birthday',       Groups => { 2 => 'Time' }, %timeInfo },
    Tz          => { Name => 'TimeZone',       Groups => { 2 => 'Time' } },
    Adr         => { Name => 'Address',        Groups => { 2 => 'Location' } },
    Geo => {
        Name => 'Geolocation',
        Groups => { 2 => 'Location' },
        # when used as a parameter, VCard 4.0 adds a "geo:" prefix that we need to remove
        ValueConv => '$val =~ s/^geo://; $val',
    },
    Anniversary => { },
    Email       => { },
    Gender      => { },
    Impp        => 'IMPP',
    Lang        => 'Language',
    Logo        => { },
    Nickname    => { },
    Note        => { },
    Org         => 'Organization',
    Photo       => { Groups => { 2 => 'Preview' } },
    Prodid      => 'Software',
    Rev         => 'Revision',
    Sound       => { },
    Tel         => 'Telephone',
    Title       => 'JobTitle',
    Uid         => 'UID',
    Url         => 'URL',
    'X-ablabel' => { Name => 'ABLabel', PrintConv => '$val =~ s/^_\$!<(.*)>!\$_$/$1/; $val' },
    'X-abdate'  => { Name => 'ABDate',  Groups => { 2 => 'Time' }, %timeInfo },
    'X-aim'     => 'AIM',
    'X-icq'     => 'ICQ',
    'X-abuid'   => 'AB_UID',
    'X-abrelatednames' => 'ABRelatedNames',
    'X-socialprofile'  => 'SocialProfile',
);

%Image::ExifTool::VCard::VCalendar = (
    GROUPS => { 1 => 'VCalendar', 2 => 'Document' },
    VARS => { NO_LOOKUP => 1 }, # omit tags from lookup
    NOTES => q{
        The VCard module is also used to process iCalendar ICS files since they use
        a format similar to vCard.  The following table lists standard iCalendar
        tags, but any existing tags will be extracted.  Top-level iCalendar
        components (eg. Event, Todo, Timezone, etc.) are used for the family 1 group
        names, and embedded components (eg. Alarm) are added as a prefix to the tag
        name.  See L<http://tools.ietf.org/html/rfc5545> for the official iCalendar
        2.0 specification.
    },
    Version     => { Name => 'VCalendarVersion',   Description => 'VCalendar Version' },
    Calscale    => 'CalendarScale',
    Method      => { },
    Prodid      => 'Software',
    Attach      => 'Attachment',
    Categories  => { },
    Class       => 'Classification',
    Comment     => { },
    Description => { },
    Geo => {
        Name => 'Geolocation',
        Groups => { 2 => 'Location' },
        ValueConv => '$val =~ s/^geo://; $val',
    },
    Location    => { Name => 'Location',            Groups => { 2 => 'Location' } },
    'Percent-complete' => 'PercentComplete',
    Priority    => { },
    Resources   => { },
    Status      => { },
    Summary     => { },
    Completed   => { Name => 'DateTimeCompleted',   Groups => { 2 => 'Time' }, %timeInfo },
    Dtend       => { Name => 'DateTimeEnd',         Groups => { 2 => 'Time' }, %timeInfo },
    Due         => { Name => 'DateTimeDue',         Groups => { 2 => 'Time' }, %timeInfo },
    Dtstart     => { Name => 'DateTimeStart',       Groups => { 2 => 'Time' }, %timeInfo },
    Duration    => { },
    Freebusy    => 'FreeBusyTime',
    Transp      => 'TimeTransparency',
    Tzid        => { Name => 'TimezoneID',          Groups => { 2 => 'Time' } },
    Tzname      => { Name => 'TimezoneName',        Groups => { 2 => 'Time' } },
    Tzoffsetfrom=> { Name => 'TimezoneOffsetFrom',  Groups => { 2 => 'Time' } },
    Tzoffsetto  => { Name => 'TimezoneOffsetTo',    Groups => { 2 => 'Time' } },
    Tzurl       => { Name => 'TimeZoneURL',         Groups => { 2 => 'Time' } },
    Attendee    => { },
    Contact     => { },
    Organizer   => { },
    'Recurrence-id' => 'RecurrenceID',
    'Related-to'    => 'RelatedTo',
    Url         => 'URL',
    Uid         => 'UID',
    Exdate      => { Name => 'ExceptionDateTimes',  Groups => { 2 => 'Time' }, %timeInfo },
    Rdate       => { Name => 'RecurrenceDateTimes', Groups => { 2 => 'Time' }, %timeInfo },
    Rrule       => { Name => 'RecurrenceRule',      Groups => { 2 => 'Time' } },
    Action      => { },
    Repeat      => { },
    Trigger     => { },
    Created     => { Name => 'DateCreated',         Groups => { 2 => 'Time' }, %timeInfo },
    Dtstamp     => { Name => 'DateTimeStamp',       Groups => { 2 => 'Time' }, %timeInfo },
    'Last-modified' => { Name => 'ModifyDate',      Groups => { 2 => 'Time' }, %timeInfo },
    Sequence    => 'SequenceNumber',
    'Request-status' => 'RequestStatus',
    Acknowledged=> { Name => 'Acknowledged',        Groups => { 2 => 'Time' }, %timeInfo },
);

#------------------------------------------------------------------------------
# Get vCard tag, creating if necessary
# Inputs: 0) ExifTool ref, 1) tag table ref, 2) tag ID, 3) tag Name,
#         4) source tagInfo ref, 5) lang code
# Returns: tagInfo ref
sub GetVCardTag($$$$;$$)
{
    my ($et, $tagTablePtr, $tag, $name, $srcInfo, $langCode) = @_;
    my $tagInfo = $$tagTablePtr{$tag};
    unless ($tagInfo) {
        if ($srcInfo) {
            $tagInfo = { %$srcInfo };
        } else {
            $tagInfo = { };
            $et->VPrint(0, $$et{INDENT}, "[adding $tag]\n");
        }
        $$tagInfo{Name} = $name;
        delete $$tagInfo{Description};  # create new description
        AddTagToTable($tagTablePtr, $tag, $tagInfo);
    }
    # handle alternate languages (the "language" parameter)
    $tagInfo = Image::ExifTool::GetLangInfo($tagInfo, $langCode) if $langCode;
    return $tagInfo;
}

#------------------------------------------------------------------------------
# Decode vCard text
# Inputs: 0) ExifTool ref, 1) vCard text, 2) encoding
# Returns: decoded text (or array ref for a list of values)
sub DecodeVCardText($$;$)
{
    my ($et, $val, $enc) = @_;
    $enc = defined($enc) ? lc $enc : '';
    if ($enc eq 'b' or $enc eq 'base64') {
        require Image::ExifTool::XMP;
        $val = Image::ExifTool::XMP::DecodeBase64($val);
    } else {
        if ($enc eq 'quoted-printable') {
            # convert "=HH" hex codes to characters
            $val =~ s/=([0-9a-f]{2})/chr(hex($1))/ige;
        }
        $val = $et->Decode($val, 'UTF8');   # convert from UTF-8
        # split into separate items if it contains an unescaped comma
        my $list = $val =~ s/(^|[^\\])((\\\\)*),/$1$2\0/g;
        # unescape necessary characters in value
        $val =~ s/\\(.)/$unescapeVCard{$1}||$1/sge;
        if ($list) {
            my @vals = split /\0/, $val;
            $val = \@vals;
        }
    }
    return $val;
}

#------------------------------------------------------------------------------
# Read information in a vCard file
# Inputs: 0) ExifTool ref, 1) dirInfo ref
# Returns: 1 on success, 0 if this wasn't a valid vCard file
sub ProcessVCard($$)
{
    local $_;
    my ($et, $dirInfo) = @_;
    my $raf = $$dirInfo{RAF};
    my ($buff, $val, $ok, $component, %compNum, @count);

    return 0 unless $raf->Read($buff, 24) and $raf->Seek(0,0) and $buff=~/^BEGIN:(VCARD|VCALENDAR)\r\n/i;
    my ($type, $lbl, $tbl, $ext) = uc($1) eq 'VCARD' ? qw(VCard vCard Main VCF) : qw(ICS iCalendar VCalendar ICS);
    $et->SetFileType($type, undef, $ext);
    return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
    local $/ = "\r\n";
    my $tagTablePtr = GetTagTable("Image::ExifTool::VCard::$tbl");
    my $more = $raf->ReadLine($buff);   # read first line
    chomp $buff if $more;
    while ($more) {
        # retrieve previous line from $buff
        $val = $buff if defined $buff;
        # read ahead to next line to see if is a continuation
        $more = $raf->ReadLine($buff);
        if ($more) {
            chomp $buff;
            # add continuation line if necessary
            $buff =~ s/^[ \t]// and $val .= $buff, undef($buff), next;
        }
        if ($val =~ /^(BEGIN|END):(V?)(\w+)$/i) {
            my ($begin, $v, $what) = ((lc($1) eq 'begin' ? 1 : 0), $2, ucfirst lc $3);
            if ($what eq 'Card' or $what eq 'Calendar') {
                if ($begin) {
                    @count = ( { } );   # reset group counters
                } else {
                    $ok = 1;    # ok if we read at least on full VCARD or VCALENDAR
                }
                next;
            }
            # absorb top-level component into family 1 group name
            if ($isComponent{$what}) {
                if ($begin) {
                    unless ($component) {
                        # begin a new top-level component
                        @count = ( { } );
                        $component = $what;
                        $compNum{$component} = ($compNum{$component} || 0) + 1;
                        next;
                    }
                } elsif ($component and $component eq $what) {
                    # this top-level component has ended
                    undef $component;
                    next;
                }
            }
            # keep count of each component at this level
            if ($begin) {
                $count[-1]{$what} = ($count[-1]{$what} || 0) + 1 if $v;
                push @count, { obj => $what };
            } elsif (@count > 1) {
                pop @count;
            }
            next;
        } elsif ($ok) {
            $ok = 0;
            $$et{DOC_NUM} = ++$$et{DOC_COUNT};  # read next card as a new document
        }
        unless ($val =~ s/^([-A-Za-z0-9.]+)//) {
            $et->WarnOnce("Unrecognized line in $lbl file");
            next;
        }
        my $tag = $1;
        # set group if it exists
        if ($tag =~ s/^([-A-Za-z0-9]+)\.//) {
            $$et{SET_GROUP1} = ucfirst lc $1;
        } elsif ($component) {
            $$et{SET_GROUP1} = $component . $compNum{$component};
        } else {
            delete $$et{SET_GROUP1};
        }
        my ($name, %param, $p, @val);
        # vCard tag ID's are case-insensitive, so normalize to lowercase with
        # an uppercase first letter for use as a tag name
        $name = ucfirst $tag if $tag =~ /[a-z]/;    # preserve mixed case in name if it exists
        $tag = ucfirst lc $tag;
        # get source tagInfo reference
        my $srcInfo = $et->GetTagInfo($tagTablePtr, $tag);
        if ($srcInfo) {
            $name = $$srcInfo{Name};    # use our name
        } else {
            $name or $name = $tag;
            # remove leading "X-" from name if it exists
            $name =~ s/^X-// and $name = ucfirst $name;
        }
        # add object name(s) to tag if necessary
        if (@count > 1) {
            my $i;
            for ($i=$#count-1; $i>=0; --$i) {
                my $pre = $count[$i-1]{obj};    # use containing object name as tag prefix
                my $c = $count[$i]{$pre};       # add index for object number
                $c = '' unless defined $c;
                $tag = $pre . $c . $tag;
                $name = $pre . $c . $name;
            }
        }
        # parse parameters
        while ($val =~ s/^;([-A-Za-z0-9]*)(=?)//) {
            $p = ucfirst lc $1;
            # convert old vCard 2.x parameters to the new "TYPE=" format
            $2 or $val = $1 . $val, $p = 'Type';
            # read parameter value
            for (;;) {
                last unless $val =~ s/^"([^"]*)",?// or $val =~ s/^([^";:,]+,?)//;
                my $v = $p eq 'Type' ? ucfirst lc $1 : $1;
                $param{$p} = defined($param{$p}) ? $param{$p} . $v : $v;
            }
            if (defined $param{$p}) {
                $param{$p} =~ s/\\(.)/$unescapeVCard{$1}||$1/sge;
            } else {
                $param{$p} = '';
            }
        }
        $val =~ s/^:// or $et->WarnOnce("Invalid line in $lbl file"), next;
        # add 'Type' parameter to id and name if it exists
        $param{Type} and $tag .= $param{Type}, $name .= $param{Type};
        # convert base64-encoded data
        if ($val =~ s{^data:(\w+)/(\w+);base64,}{}) {
            my $xtra = ucfirst(lc $1) . ucfirst(lc $2);
            $tag .= $xtra;
            $name .= $xtra;
            $param{Encoding} = 'base64';
        }
        $val = DecodeVCardText($et, $val, $param{Encoding});
        my $tagInfo = GetVCardTag($et, $tagTablePtr, $tag, $name, $srcInfo, $param{Language});
        $et->HandleTag($tagTablePtr, $tag, $val, TagInfo => $tagInfo);
        # handle some other parameters that we care about (ignore the rest for now)
        foreach $p (qw(Geo Label Tzid)) {
            next unless defined $param{$p};
            # use tag attributes from our table if it exists
            my $srcTag2 = $et->GetTagInfo($tagTablePtr, $p);
            my $pn = $srcTag2 ? $$srcTag2{Name} : $p;
            $val = DecodeVCardText($et, $param{$p});
            # add parameter to tag ID and name
            my ($tg, $nm) = ($tag . $p, $name . $pn);
            $tagInfo = GetVCardTag($et, $tagTablePtr, $tg, $nm, $srcTag2, $param{Language});
            $et->HandleTag($tagTablePtr, $tg, $val, TagInfo => $tagInfo);
        }
    }
    delete $$et{SET_GROUP1};
    delete $$et{DOC_NUM};
    $ok or $et->Warn("Missing $lbl end");
    return 1;
}

1;  # end

__END__

=head1 NAME

Image::ExifTool::VCard - Read vCard and iCalendar meta information

=head1 SYNOPSIS

This module is used by Image::ExifTool

=head1 DESCRIPTION

This module contains definitions required by Image::ExifTool to read meta
information from vCard VCF and iCalendar ICS files.

=head1 AUTHOR

Copyright 2003-2018, Phil Harvey (phil at owl.phy.queensu.ca)

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 REFERENCES

=over 4

=item L<http://en.m.wikipedia.org/wiki/VCard>

=item L<http://tools.ietf.org/html/rfc6350>

=item L<http://tools.ietf.org/html/rfc5545>

=back

=head1 SEE ALSO

L<Image::ExifTool::TagNames/VCard Tags>,
L<Image::ExifTool::TagNames/VCard VCalendar Tags>,
L<Image::ExifTool(3pm)|Image::ExifTool>

=cut