package XML::Validator::Schema::SimpleType;
use strict;
use warnings;
=item NAME
XML::Validator::Schema::SimpleType
=head1 DESCRIPTION
XML Schema simple type system. This module provides objects and class
methods to support simple types. For complex types see the ModelNode
class.
=head1 USAGE
# create a new anonymous type based on an existing type
my $type = $string->derive();
# create a new named type based on an existing type
my $type = $string->derive(name => 'myString');
# add a restriction
$type->restrict(enumeration => "10");
# check a value against a type
($ok, $msg) = $type->check($value);
=cut
use Carp qw(croak);
use XML::Validator::Schema::Util qw(XSD _err);
# facet support bit-patterns
use constant LENGTH => 0b0000000000000001;
use constant MINLENGTH => 0b0000000000000010;
use constant MAXLENGTH => 0b0000000000000100;
use constant PATTERN => 0b0000000000001000;
use constant ENUMERATION => 0b0000000000010000;
use constant WHITESPACE => 0b0000000000100000;
use constant MAXINCLUSIVE => 0b0000000001000000;
use constant MAXEXCLUSIVE => 0b0000000010000000;
use constant MININCLUSIVE => 0b0000000100000000;
use constant MINEXCLUSIVE => 0b0000001000000000;
use constant TOTALDIGITS => 0b0000010000000000;
use constant FRACTIONDIGITS => 0b0000100000000000;
# hash mapping names to values
our %FACET = (length => LENGTH,
minLength => MINLENGTH,
maxLength => MAXLENGTH,
pattern => PATTERN,
enumeration => ENUMERATION,
whiteSpace => WHITESPACE,
maxInclusive => MAXINCLUSIVE,
maxExclusive => MAXEXCLUSIVE,
minInclusive => MININCLUSIVE,
minExclusive => MINEXCLUSIVE,
totalDigits => TOTALDIGITS,
fractionDigits => FRACTIONDIGITS);
# initialize builtin types
our %BUILTIN;
# create the primitive types
$BUILTIN{string} = __PACKAGE__->new(name => 'string',
facets => LENGTH|MINLENGTH|MAXLENGTH|
PATTERN|ENUMERATION|WHITESPACE,
);
$BUILTIN{boolean} = __PACKAGE__->new(name => 'boolean',
facets => PATTERN|WHITESPACE,
);
$BUILTIN{boolean}->restrict(enumeration => "1",
enumeration => "0",
enumeration => "true",
enumeration => "false");
$BUILTIN{decimal} = __PACKAGE__->new(name => 'decimal',
facets => TOTALDIGITS|FRACTIONDIGITS|
PATTERN|WHITESPACE|
#ENUMERATION|
MAXINCLUSIVE|MAXEXCLUSIVE|
MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{decimal}->restrict(pattern => qr/^[+-]?(?:(?:\d+(?:\.\d+)?)|(?:\.\d+))$/);
$BUILTIN{dateTime} = __PACKAGE__->new(name => 'dateTime',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{dateTime}->restrict(pattern => qr/^[-+]?(\d{4,})-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:(?:Z)|(?:[-+]\d{2}:\d{2}))?$/);
$BUILTIN{float} = __PACKAGE__->new(name => 'float',
facets => PATTERN|WHITESPACE,
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE);
);
$BUILTIN{float}->restrict(pattern =>
qr/^[+-]?(?:(?:INF)|(?:NaN)|(?:\d+(?:\.\d+)?)(?:[eE][+-]?\d+)?)$/);
$BUILTIN{double} = __PACKAGE__->new(name => 'double',
facets => PATTERN|WHITESPACE,
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE);
);
$BUILTIN{double}->restrict(pattern =>
qr/^[+-]?(?:(?:INF)|(?:NaN)|(?:\d+(?:\.\d+)?)(?:[eE][+-]?\d+)?)$/);
$BUILTIN{duration} = __PACKAGE__->new(name => 'duration',
facets => PATTERN|WHITESPACE,);
#facets => PATTERN|WHITESPACE|ENUMERATION|MAXINCLUSIVE|MAXEXCLUSIVE|MININCLUSIVE|MINEXCLUSIVE);
# thanks to perlmonk Abigail-II
$BUILTIN{duration}->restrict(pattern => qr /^-? # Optional leading minus.
P # Required.
(?=[T\d]) # Duration cannot be empty.
(?:(?!-) \d+ Y)? # Non-negative integer, Y (optional)
(?:(?!-) \d+ M)? # Non-negative integer, M (optional)
(?:(?!-) \d+ D)? # Non-negative integer, D (optional)
(
(?:T (?=\d) # T, must be followed by a digit.
(?:(?!-) \d+ H)? # Non-negative integer, H (optional)
(?:(?!-) \d+ M)? # Non-negative integer, M (optional)
(?:(?!-) \d+\.\d+ S)? # Non-negative decimal, S (optional)
)? # Entire T part is optional
)$/x);
$BUILTIN{time} = __PACKAGE__->new(name => 'time',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{time}->restrict(pattern =>
qr /^[0-2]\d:[0-5]\d:[0-5]\d(\.\d+)?(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/);
$BUILTIN{date} = __PACKAGE__->new(name => 'date',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{date}->restrict(pattern =>
qr /^[-]?(\d{4,})-(\d\d)-(\d\d)(??{ _validate_date($1,$2,$3) })(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/);
$BUILTIN{gYearMonth} = __PACKAGE__->new(name => 'gYearMonth',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{gYearMonth}->restrict(pattern =>
qr /^[-]?(\d{4,})-(1[0-2]{1}|0\d{1})(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/);
$BUILTIN{gYear} = __PACKAGE__->new(name => 'gYear',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{gYear}->restrict(pattern =>
qr /^[-]?(\d{4,})(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/);
$BUILTIN{gMonthDay} = __PACKAGE__->new(name => 'gMonthDay',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{gMonthDay}->restrict(pattern =>
qr /^--(\d{2,})-(\d\d)(??{_validate_date(1999,$1,$2)})(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/ );
$BUILTIN{gDay} = __PACKAGE__->new(name => 'gDay',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{gDay}->restrict(pattern =>
qr /^---([0-2]\d{1}|3[0|1])(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/ );
$BUILTIN{gMonth} = __PACKAGE__->new(name => 'gMonth',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{gMonth}->restrict(pattern =>
qr /^--(0\d|1[0-2])(Z?|([+|-]([0-1]\d|2[0-4])\:([0-5]\d))?)$/ );
$BUILTIN{hexBinary} = __PACKAGE__->new(name => 'hexBinary',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{hexBinary}->restrict(pattern =>
qr /^([0-9a-fA-F][0-9a-fA-F])+$/);
$BUILTIN{base64Binary} = __PACKAGE__->new(name => 'base64Binary',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{base64Binary}->restrict(pattern =>
qr /^([0-9a-zA-Z\+\\\=][0-9a-zA-Z\+\\\=])+$/);
$BUILTIN{anyURI} = __PACKAGE__->new(name => 'anyURI',
facets => LENGTH|MINLENGTH|MAXLENGTH|
PATTERN|ENUMERATION|WHITESPACE,
);
$BUILTIN{QName} = __PACKAGE__->new(name => 'QName',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{QName}->restrict(pattern =>
qr /^([A-z][A-z0-9]+:)?([A-z][A-z0-9]+)$/);
$BUILTIN{NOTATION} = __PACKAGE__->new(name => 'NOTATION',
facets => PATTERN|WHITESPACE
#|ENUMERATION|
#MAXINCLUSIVE|MAXEXCLUSIVE|
#MININCLUSIVE|MINEXCLUSIVE,
);
$BUILTIN{NOTATION}->restrict(pattern =>
qr /^([A-z][A-z0-9]+:)?([A-z][A-z0-9]+)$/);
# create derived types
$BUILTIN{integer} = $BUILTIN{decimal}->derive(name => 'integer');
$BUILTIN{integer}->restrict(pattern => qr/^[+-]?\d+$/);
# http://www.w3.org/TR/2000/CR-xmlschema-2-20001024/#nonPositiveInteger
$BUILTIN{nonPositiveInteger} = $BUILTIN{integer}->derive(name => 'nonPositiveInteger');
$BUILTIN{nonPositiveInteger}->restrict( maxInclusive => 0 );
# http://www.w3.org/TR/2000/CR-xmlschema-2-20001024/#nonNegativeInteger
$BUILTIN{nonNegativeInteger} = $BUILTIN{integer}->derive(name => 'nonNegativeInteger');
$BUILTIN{nonNegativeInteger}->restrict( minInclusive => 0 );
# http://www.w3.org/TR/2000/CR-xmlschema-2-20001024/#positiveInteger
$BUILTIN{positiveInteger} = $BUILTIN{nonNegativeInteger}->derive(name => 'positiveInteger');
$BUILTIN{positiveInteger}->restrict( minInclusive => 1 );
# http://www.w3.org/TR/2000/CR-xmlschema-2-20001024/#negativeInteger
$BUILTIN{negativeInteger} = $BUILTIN{nonPositiveInteger}->derive(name => 'negativeInteger');
$BUILTIN{negativeInteger}->restrict( maxInclusive => -1 );
$BUILTIN{int} = $BUILTIN{integer}->derive(name => 'int');
$BUILTIN{int}->restrict(minInclusive => -2147483648,
maxInclusive => 2147483647);
$BUILTIN{unsignedInt} = $BUILTIN{integer}->derive(name => 'unsignedInt');
$BUILTIN{unsignedInt}->restrict(minInclusive => 0,
maxInclusive => 4294967295);
$BUILTIN{short} = $BUILTIN{int}->derive(name => 'short');
$BUILTIN{short}->restrict(minInclusive => -32768,
maxInclusive => 32767);
$BUILTIN{unsignedShort} = $BUILTIN{unsignedInt}->derive(name =>
'unsignedShort');
$BUILTIN{unsignedShort}->restrict(maxInclusive => 65535);
$BUILTIN{byte} = $BUILTIN{short}->derive(name => 'byte');
$BUILTIN{byte}->restrict(minInclusive => -128,
maxInclusive => 127);
$BUILTIN{unsignedByte} = $BUILTIN{unsignedShort}->derive(name =>
'unsignedByte');
$BUILTIN{unsignedByte}->restrict(maxInclusive => 255);
$BUILTIN{normalizedString} = $BUILTIN{string}->derive(name =>
'normalizedString');
$BUILTIN{normalizedString}->restrict(whiteSpace => 'replace');
$BUILTIN{token} = $BUILTIN{normalizedString}->derive(name => 'token');
$BUILTIN{token}->restrict(whiteSpace => 'collapse');
$BUILTIN{NMTOKEN} = $BUILTIN{token}->derive(name => 'NMTOKEN');
$BUILTIN{NMTOKEN}->restrict(pattern => qr/^[-.:\w\d]*$/);
######################
# SimpleType methods #
######################
# create a new type, filing in the library if named
sub new {
my ($pkg, %arg) = @_;
my $self = bless(\%arg, $pkg);
return $self;
}
# create a type derived from this type
sub derive {
my ($self, @opt) = @_;
my $sub = ref($self)->new(@opt);
$sub->{base} = $self;
return $sub;
}
sub restrict {
my $self = shift;
my $root = $self->root;
while (@_) {
my ($key, $value) = (shift, shift);
# is this a legal restriction? (base types can do whatever they want
_err("Found illegal restriction '$key' on type derived from '$root->{name}'.")
unless ($self == $root) or
($FACET{$key} & $root->{facets});
push @{$self->{restrict}{$key} ||= []}, $value;
}
}
# returns the ultimate base type for this type
sub root {
my $self = shift;
my $p = $self;
while ($p->{base}) {
$p = $p->{base};
}
return $p;
}
sub normalize_ws {
my ($self, $value) = @_;
if ($self->{restrict}{whiteSpace}) {
my $ws = $self->{restrict}{whiteSpace}[0];
if ($ws eq 'replace') {
$value =~ s![\t\n\r]! !g;
} elsif ($ws eq 'collapse') {
$value =~ s!\s+! !g;
$value =~ s!^\s!!g;
$value =~ s!\s$!!g;
}
return $value;
}
return $self->{base}->normalize_ws($value) if $self->{base};
return $value;
}
sub check {
my ($self, $value) = @_;
my $root = $self->root;
my ($ok, $msg);
# first deal with whitespace, necessary before applying facets
$value = $self->normalize_ws($value);
# first check base restrictions
if ($self->{base}) {
($ok, $msg) = $self->{base}->check($value);
return ($ok, $msg) unless $ok;
}
# check various constraints
my $r = $self->{restrict};
if ($r->{length}) {
foreach my $len (@{$r->{length}}) {
return (0, "is not exactly $len characters.")
unless length($value) eq $len;
}
}
if ($r->{maxLength}) {
foreach my $len (@{$r->{maxLength}}) {
return (0, "is longer than maximum $len characters.")
if length($value) > $len;
}
}
if ($r->{minLength}) {
foreach my $len (@{$r->{minLength}}) {
return (0, "is shorter than minimum $len characters.")
if length($value) < $len;
}
}
if ($r->{enumeration}) {
return (0, 'not in allowed list (' .
join(', ', @{$r->{enumeration}}) . ')')
unless grep { $_ eq $value } (@{$r->{enumeration}});
}
if ($r->{pattern}) {
my $pass = 0;
foreach my $pattern (@{$r->{pattern}}) {
if ($value =~ /$pattern/) {
$pass = 1;
last;
}
}
return (0, "does not match required pattern.")
unless $pass;
}
if ($r->{minInclusive}) {
foreach my $min (@{$r->{minInclusive}}) {
return (0, "is below minimum (inclusive) allowed, $min")
if $value < $min;
}
}
if ($r->{minExclusive}) {
foreach my $min (@{$r->{minExclusive}}) {
return (0, "is below minimum allowed, $min")
if $value <= $min;
}
}
if ($r->{maxInclusive}) {
foreach my $max (@{$r->{maxInclusive}}) {
return (0, "is above maximum (inclusive) allowed, $max")
if $value > $max;
}
}
if ($r->{maxExclusive}) {
foreach my $max (@{$r->{maxExclusive}}) {
return (0, "is above maximum allowed, $max")
if $value >= $max;
}
}
if ($r->{totalDigits} or $r->{fractionDigits}) {
# strip leading and trailing zeros for numeric constraints
(my $digits = $value) =~ s/^([+-]?)0*(\d*\.?\d*?)0*$/$1$2/g;
if ($r->{totalDigits}) {
foreach my $tdigits (@{$r->{totalDigits}}) {
return (0, "has more total digits than allowed, $tdigits")
if $digits =~ tr!0-9!! > $tdigits;
}
}
if ($r->{fractionDigits}) {
foreach my $fdigits (@{$r->{fractionDigits}}) {
return (0, "has more fraction digits than allowed, $fdigits")
if $digits =~ /\.\d{$fdigits}\d/;
}
}
}
return (1);
}
#
# begin code taken from Date::Simple
#
my @days_in_month = ([0,31,28,31,30,31,30,31,31,30,31,30,31],
[0,31,29,31,30,31,30,31,31,30,31,30,31]);
sub _validate_date {
my ($y, $m, $d)= @_;
# any +ve integral year is valid
return q{(?!)} if $y != abs int $y;
return q{(?!)} unless 1 <= $m and $m <= 12;
return q{(?!)} unless 1 <= $d and $d <=$days_in_month[_leap_year($y)][$m];
# perl 5.10.0 choked on (?=) here, switching to just returning
# nothing, which should also always match.
return '';
}
sub _leap_year {
my $y = shift;
return (($y%4==0) and ($y%400==0 or $y%100!=0)) || 0;
}
#
# end code taken from Date::Simple
#
1;