# Copyrights 2006-2015 by [Mark Overmeer].
# For other contributors see ChangeLog.
# See the manual pages for details on the licensing terms.
# Pod stripped from pm file by OODoc 2.01.
use warnings;
use strict;
no warnings 'recursion';
package XML::Compile::Schema::BuiltInTypes;
use vars '$VERSION';
$VERSION = '1.51';
use base 'Exporter';
our @EXPORT = qw/%builtin_types builtin_type_info/;
our %builtin_types;
use Log::Report 'xml-compile', syntax => 'SHORT';
use POSIX qw/strftime/;
use Math::BigInt;
use Math::BigFloat;
use MIME::Base64;
use XML::Compile::Util qw/pack_type unpack_type/;
use POSIX qw/floor log10/;
use Config '%Config';
my $iv_bits = $Config{ivsize} * 8 -1;
my $iv_digits = floor($iv_bits * log10(2));
my $fits_iv = qr/^[+-]?[0-9]{1,$iv_digits}$/;
sub builtin_type_info($) { $builtin_types{$_[0]} }
# The XML reader calls
# check(parse(value)) or check_read(parse(value))
# The XML writer calls
# check(format(value)) or check_write(format(value))
# Parse has a second argument, only for QNAME: the node
# Format has a second argument for QNAME as well.
sub identity { $_[0] }
# already validated, unless that is disabled.
sub str2int { $_[0] + 0 }
# sprintf returns '0' if non-int, with warning. We need a validation error
sub int2str { $_[0] =~ m/^\s*[0-9]+\s*$/ ? sprintf("%ld", $_[0]) : $_[0] }
sub str { "$_[0]" }
sub _replace { $_[0] =~ s/[\t\r\n]/ /g; $_[0]}
sub _collapse { local $_ = $_[0]; s/[\t\r\n]+/ /g; s/^ +//; s/ +$//; $_}
# format not useful, because xsi:type not supported
$builtin_types{anySimpleType} =
{ example => 'anySimple'
, parse => sub {shift}
, extends => 'anyType'
};
$builtin_types{anyType} =
{ example => 'anything'
, parse => sub {shift}
, extends => undef # the root type
};
$builtin_types{anyAtomicType} =
{ example => 'anyAtomic'
, parse => sub {shift}
, extends => 'anySimpleType'
};
$builtin_types{error} = {example => '[some error structure]'};
$builtin_types{boolean} =
{ parse => sub { $_[0] =~ m/^\s*false|0\s*/i ? 0 : 1 }
, format => sub { $_[0] eq 'false' || $_[0] eq 'true' ? $_[0]
: $_[0] ? 1 : 0 }
, check => sub { $_[0] =~ m/^\s*(?:false|true|0|1)\s*$/i }
, example => 'true'
, extends => 'anyAtomicType'
};
$builtin_types{pattern} =
{ example => '*.exe'
};
sub bigint
{ my $v = shift;
$v =~ s/\s+//g;
return $v if $v =~ $fits_iv;
my $big = Math::BigInt->new($v);
error __x"Value `{val}' is not a (big) integer", val => $big
if $big->is_nan;
$big;
}
$builtin_types{integer} =
{ parse => \&bigint
, check => sub { $_[0] =~ m/^\s*[-+]?\s*[0-9][\s0-9]*$/ }
, example => 42
, extends => 'decimal'
};
$builtin_types{negativeInteger} =
{ parse => \&bigint
, check => sub { $_[0] =~ m/^\s*\-\s*[0-9][\s0-9]*$/ }
, example => '-1'
, extends => 'nonPositiveInteger'
};
$builtin_types{nonNegativeInteger} =
{ parse => \&bigint
, check => sub { $_[0] =~ m/^\s*(?:\+\s*)?[0-9][\s0-9]*$/ }
, example => '17'
, extends => 'integer'
};
$builtin_types{positiveInteger} =
{ parse => \&bigint
, check => sub { $_[0] =~ m/^\s*(?:\+\s*)?[0-9][\s0-9]*$/ && $_[0] =~ m/[1-9]/ }
, example => '+3'
, extends => 'nonNegativeInteger'
};
$builtin_types{nonPositiveInteger} =
{ parse => \&bigint
, check => sub { $_[0] =~ m/^\s*(?:\-\s*)?[0-9][\s0-9]*$/
|| $_[0] =~ m/^\s*(?:\+\s*)0[0\s]*$/ }
, example => '-42'
, extends => 'integer'
};
$builtin_types{long} =
{ parse => \&bigint
, check =>
sub { $_[0] =~ m/^\s*[-+]?\s*[0-9][\s0-9]*$/ && ($_[0] =~ tr/0-9//) < 20 }
, example => '-100'
, extends => 'integer'
};
$builtin_types{unsignedLong} =
{ parse => \&bigint
, check => sub {$_[0] =~ m/^\s*\+?\s*[0-9][\s0-9]*$/ && ($_[0] =~ tr/0-9//) < 21}
, example => '100'
, extends => 'nonNegativeInteger'
};
$builtin_types{unsignedInt} =
{ parse => \&bigint
, check => sub {$_[0] =~ m/^\s*\+?\s*[0-9][\s0-9]*$/ && ($_[0] =~ tr/0-9//) <=10}
, example => '42'
, extends => 'unsignedLong'
};
# Used when 'sloppy_integers' was set: the size of the values
# is illegally limited to the size of Perl's 32-bit signed integers.
$builtin_types{non_pos_int} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*[+-]?\s*[0-9][0-9\s]*$/ && $_[0] <= 0}
, example => '-12'
};
$builtin_types{positive_int} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*(?:\+\s*)?[0-9][0-9\s]*$/ }
, example => '+42'
};
$builtin_types{negative_int} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*\-\s*[0-9][0-9\s]*$/ }
, example => '-12'
};
$builtin_types{unsigned_int} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*(?:\+\s*)?[0-9][0-9\s]*$/ && $_[0] >= 0}
, example => '42'
};
$builtin_types{int} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*[+-]?[0-9]+\s*$/}
, example => '42'
, extends => 'long'
};
$builtin_types{short} =
{ parse => \&str2int
, format => \&int2str
, check =>
sub { $_[0] =~ m/^\s*[+-]?[0-9]+\s*$/ && $_[0] >= -32768 && $_[0] <= 32767 }
, example => '-7'
, extends => 'int'
};
$builtin_types{unsignedShort} =
{ parse => \&str2int
, format => \&int2str
, check =>
sub { $_[0] =~ m/^\s*[+-]?[0-9]+\s*$/ && $_[0] >= 0 && $_[0] <= 65535 }
, example => '7'
, extends => 'unsignedInt'
};
$builtin_types{byte} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*[+-]?[0-9]+\s*$/ && $_[0] >= -128 && $_[0] <=127}
, example => '-2'
, extends => 'short'
};
$builtin_types{unsignedByte} =
{ parse => \&str2int
, format => \&int2str
, check => sub {$_[0] =~ m/^\s*[+-]?[0-9]+\s*$/ && $_[0] >= 0 && $_[0] <= 255}
, example => '2'
, extends => 'unsignedShort'
};
$builtin_types{decimal} =
{ parse => sub {$_[0] =~ s/\s+//g; Math::BigFloat->new($_[0]) }
, check => sub {$_[0] =~ m/^(\+|\-)?([0-9]+(\.[0-9]*)?|\.[0-9]+)$/}
, example => '3.1415'
, extends => 'anyAtomicType'
};
sub str2num
{ my $s = shift;
$s =~ s/\s//g;
$s =~ m/[^0-9]/ ? Math::BigFloat->new($s eq 'NaN' ? $s : lc $s) # INF->inf
: length $s < 9 ? $s+0
: Math::BigInt->new($s);
}
sub num2str
{ my $f = shift;
!ref $f ? $f
: !(UNIVERSAL::isa($f,'Math::BigInt') || UNIVERSAL::isa($f,'Math::BigFloat'))
? eval {use warnings FATAL => 'all'; $f + 0.0}
: $f->is_nan ? 'NaN'
: uc $f->bstr; # [+-]inf -> [+-]INF, e->E doesn't matter
}
sub numcheck($)
{ $_[0] =~
m# [+-]? (?: [0-9]+(?:\.[0-9]*)?|\.[0-9]+) (?:[Ee][+-]?[0-9]+)?
| [+-]? INF
| NaN #x
}
$builtin_types{precissionDecimal} =
$builtin_types{float} =
$builtin_types{double} =
{ parse => \&str2num
, format => \&num2str
, check => \&numcheck
, example => '3.1415'
, extends => 'anyAtomicType'
};
$builtin_types{sloppy_float} =
{ parse => sub { $_[0] }
, check => sub {
my $v = eval {use warnings FATAL => 'all'; $_[0] + 0.0};
$@ ? undef : 1;
}
, example => '3.1415'
, extends => 'anyAtomicType'
};
$builtin_types{base64Binary} =
{ parse => sub { eval { decode_base64 $_[0] } }
, format => sub {
my $a = $_[0];
eval { utf8::downgrade($a) };
if($@)
{ error __x"use Encode::encode() for base64Binary field at {path}"
, path => $_[2];
}
encode_base64 $a, '';
}
, check => sub { !$@ }
, example => 'decoded bytes'
, extends => 'anyAtomicType'
};
# (Use of) an XS implementation would be nice
$builtin_types{hexBinary} =
{ parse => sub { $_[0] =~ s/\s+//g; pack 'H*', $_[0]}
, format => sub { uc unpack 'H*', $_[0]}
, check =>
sub { $_[0] !~ m/[^0-9a-fA-F\s]/ && (($_[0] =~ tr/0-9a-fA-F//) %2)==0}
, example => 'F00F'
, extends => 'anyAtomicType'
};
my $yearFrag = qr/ \-? (?: [1-9][0-9]{3,} | 0[0-9][0-9][0-9] ) /x;
my $monthFrag = qr/ 0[1-9] | 1[0-2] /x;
my $dayFrag = qr/ 0[1-9] | [12][0-9] | 3[01] /x;
my $hourFrag = qr/ [01][0-9] | 2[0-3] /x;
my $minuteFrag = qr/ [0-5][0-9] /x;
my $secondFrag = qr/ [0-5][0-9] (?: \.[0-9]+)? /x;
my $endOfDayFrag = qr/24\:00\:00 (?: \.[0-9]+)? /x;
my $timezoneFrag = qr/Z | [+-] (?: 0[0-9] | 1[0-4] ) \: $minuteFrag/x;
my $timeFrag = qr/ (?: $hourFrag \: $minuteFrag \: $secondFrag )
| $endOfDayFrag
/x;
my $date = qr/^ $yearFrag \- $monthFrag \- $dayFrag $timezoneFrag? $/x;
$builtin_types{date} =
{ parse => \&_collapse
, format => sub { $_[0] =~ /^[0-9]+$/ ? strftime("%Y-%m-%d", gmtime $_[0]) : $_[0]}
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $date }
, example => '2006-10-06'
, extends => 'anyAtomicType'
};
my $time = qr /^ $timeFrag $timezoneFrag? $/x;
$builtin_types{time} =
{ parse => \&_collapse
, format => sub { return $_[0] if $_[0] =~ /[^0-9.]/;
my $subsec = $_[0] =~ /(\.[0-9]+)/ ? $1 : '';
strftime "%T$subsec", gmtime $_[0] }
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $time }
, example => '11:12:13'
, extends => 'anyAtomicType'
};
my $dateTime
= qr/^ $yearFrag \- $monthFrag \- $dayFrag T $timeFrag $timezoneFrag? $/x;
my $dateTimeStamp
= qr/^ $yearFrag \- $monthFrag \- $dayFrag T $timeFrag $timezoneFrag $/x;
sub _dt_format
{ return $_[0] if $_[0] =~ /[^0-9.]/; # already formated
my $subsec = $_[0] =~ /(\.[0-9]+)/ ? $1 : '';
strftime "%Y-%m-%dT%H:%M:%S${subsec}Z", gmtime $_[0];
}
$builtin_types{dateTime} =
{ parse => \&_collapse
, format => \&_dt_format
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $dateTime }
, example => '2006-10-06T00:23:02Z'
, extends => 'anyAtomicType'
};
$builtin_types{dateTimeStamp} =
{ parse => \&_collapse
, format => \&_dt_format
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $dateTimeStamp }
, example => '2006-10-06T00:23:02Z'
, extends => 'dateTime'
};
my $gDay = qr/^ \- \- \- $dayFrag $timezoneFrag? $/x;
$builtin_types{gDay} =
{ parse => \&_collapse
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $gDay }
, example => '---12+09:00'
, extends => 'anyAtomicType'
};
my $gMonth = qr/^ \- \- $monthFrag $timezoneFrag? $/x;
$builtin_types{gMonth} =
{ parse => \&_collapse
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $gMonth }
, example => '--09+07:00'
, extends => 'anyAtomicType'
};
my $gMonthDay = qr/^ \- \- $monthFrag \- $dayFrag $timezoneFrag? /x;
$builtin_types{gMonthDay} =
{ parse => \&_collapse
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $gMonthDay }
, example => '--09-12+07:00'
, extends => 'anyAtomicType'
};
my $gYear = qr/^ $yearFrag $timezoneFrag? $/x;
$builtin_types{gYear} =
{ parse => \&_collapse
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $gYear }
, example => '2006+07:00'
, extends => 'anyAtomicType'
};
my $gYearMonth = qr/^ $yearFrag \- $monthFrag $timezoneFrag? $/x;
$builtin_types{gYearMonth} =
{ parse => \&_collapse
, check => sub { (my $val = $_[0]) =~ s/\s+//g; $val =~ $gYearMonth }
, example => '2006-11+07:00'
, extends => 'anyAtomicType'
};
$builtin_types{duration} =
{ parse => \&_collapse
, check => sub { my $val = $_[0]; $val =~ s/\s+//g;
$val =~ m/^\-?P(?:[0-9]+Y)?(?:[0-9]+M)?(?:[0-9]+D)?
(?:T(?:[0-9]+H)?(?:[0-9]+M)?(?:[0-9]+(?:\.[0-9]+)?S)?)?$/x }
, example => 'P9M2DT3H5M'
};
$builtin_types{dayTimeDuration} =
{ parse => \&_collapse
, check => sub { my $val = $_[0]; $val =~ s/\s+//g; $val =~
m/^\-?P(?:[0-9]+D)?(?:T(?:[0-9]+H)?(?:[0-9]+M)?(?:[0-9]+(?:\.[0-9]+)?S)?)?$/ }
, example => 'P2DT3H5M10S'
, extends => 'duration'
};
$builtin_types{yearMonthDuration} =
{ parse => \&_collapse
, check => sub { my $val = $_[0]; $val =~ s/\s+//g; $val =~
m/^\-?P(?:[0-9]+Y)?(?:[0-9]+M)?$/ }
, example => 'P40Y5M'
, extends => 'duration'
};
$builtin_types{string} =
{ example => 'example'
, extends => 'anyAtomicType'
};
$builtin_types{normalizedString} =
{ parse => \&_replace
, example => 'example'
, extends => 'string'
};
$builtin_types{language} =
{ parse => \&_collapse
, check => sub { my $v = $_[0]; $v =~ s/\s+//g; $v =~
m/^[a-zA-Z]{1,8}(?:\-[a-zA-Z0-9]{1,8})*$/ }
, example => 'nl-NL'
, extends => 'token'
};
# NCName matches pattern [\i-[:]][\c-[:]]*
sub _ncname($)
{ (my $name = $_[0]) =~ s/\s//;
$name =~ m/^[a-zA-Z_](?:[\w.-]*)$/;
}
my $ids = 0;
$builtin_types{ID} =
{ parse => \&_collapse
, check => \&_ncname
, example => 'id_'.$ids++
, extends => 'NCName'
};
$builtin_types{IDREF} =
{ parse => \&_collapse
, check => \&_ncname
, example => 'id-ref'
, extends => 'NCName'
};
$builtin_types{NCName} =
{ parse => \&_collapse
, check => \&_ncname
, example => 'label'
, extends => 'Name'
};
$builtin_types{ENTITY} =
{ parse => \&_collapse
, check => \&_ncname
, example => 'entity'
, extends => 'NCName'
};
$builtin_types{IDREFS} =
$builtin_types{ENTITIES} =
{ parse => sub { [ split ' ', shift ] }
, format => sub { my $v = shift; ref $v eq 'ARRAY' ? join(' ',@$v) : $v }
, check => sub { $_[0] !~ m/\:/ }
, example => 'labels'
, is_list => 1
, extends => 'anySimpleType'
};
$builtin_types{Name} =
{ parse => \&_collapse
, example => 'name'
, extends => 'token'
};
$builtin_types{token} =
{ parse => \&_collapse
, example => 'token'
, extends => 'normalizedString'
};
# check required! \c
$builtin_types{NMTOKEN} =
{ parse => sub { $_[0] =~ s/\s+//g; $_[0] }
, example => 'nmtoken'
, extends => 'token'
};
$builtin_types{NMTOKENS} =
{ parse => sub { [ split ' ', shift ] }
, format => sub { my $v = shift; ref $v eq 'ARRAY' ? join(' ',@$v) : $v }
, example => 'nmtokens'
, is_list => 1
, extends => 'anySimpleType'
};
# relative uri's are also correct, so even empty strings... it
# cannot be checked without context.
# use Regexp::Common qw/URI/;
# check => sub { $_[0] =~ $RE{URI} }
$builtin_types{anyURI} =
{ parse => \&_collapse
, example => 'http://example.com'
, extends => 'anyAtomicType'
};
$builtin_types{QName} =
{ parse =>
sub { my ($qname, $node) = @_;
$qname =~ s/\s//g;
my $prefix = $qname =~ s/^([^:]*)\:// ? $1 : '';
$node = $node->node if $node->isa('XML::Compile::Iterator');
my $ns = $node->lookupNamespaceURI($prefix) || '';
pack_type $ns, $qname;
}
, format =>
sub { my ($type, $trans) = @_;
my ($ns, $local) = unpack_type $type;
length $ns or return $local;
my $def = $trans->{$ns};
# let's hope that the namespace will get used somewhere else as
# well, to make it into the xmlns.
defined $def && exists $def->{used}
or error __x"QName formatting only works if the namespace is used for an element, not found {ns} for {local}", ns => $ns, local => $local;
length $def->{prefix} ? "$def->{prefix}:$local" : $local;
}
, example => 'myns:local'
, extends => 'anyAtomicType'
};
$builtin_types{NOTATION} =
{
extends => 'anyAtomicType'
};
$builtin_types{binary} = { example => 'binary string' };
$builtin_types{timeDuration} = $builtin_types{duration};
$builtin_types{uriReference} = $builtin_types{anyURI};
# These constants where removed from the spec in 2001. Probably
# no-one is using these (anymore)
# century = period => 'P100Y'
# recurringDate = duration => 'P24H', period => 'P1Y'
# recurringDay = duration => 'P24H', period => 'P1M'
# timeInstant = duration => 'P0Y', period => 'P0Y'
# timePeriod = duration => 'P0Y'
# year = period => 'P1Y'
# recurringDuration = ??
# only in 2000/10 schemas
$builtin_types{CDATA} =
{ parse => \&_replace
, example => 'CDATA'
};
1;