use strict;
use XML::Rules;
use Data::Dumper;
my %rules;
my $parser = XML::Rules->new(
namespaces => { '*' => ''},
rules => {
_default => sub {
my ($tag, $attrs, $context, $parent_data, $parser) = @_;
my $repeated = (exists $parent_data->[-1] and exists $parent_data->[-1]{$tag});
my $has_content = (exists $attrs->{_content});
my $has_children = grep ref($_) eq 'HASH', values %$attrs;
my $has_attr = grep {$_ ne '_content' and !ref($attrs->{$_})} keys %$attrs;
my $rule = do {
if ($repeated) {
if ($has_content) {
if ($has_attr or $has_children) {
'as array'
} else {
'content array'
}
} else {
if ($has_attr or $has_children) {
'as array no content'
} else {
'content array'
}
}
} else {
if ($has_content) {
if ($has_attr or $has_children) {
'as is'
} else {
'content'
}
} else {
if ($has_attr or $has_children) {
'no content'
} else {
'content'
}
}
}
};
if (not exists $rules{$tag}) {
$rules{$tag} = $rule
} elsif($rules{$tag} ne $rule) {
# we've already seen the tag and it had different type
if ($rules{$tag} eq 'raw extended array') {
} elsif ($rule eq 'raw extended array') {
$rules{$tag} = 'raw extended array';
} elsif ($rules{$tag} eq 'raw extended' and $rule =~ /array/
or $rule eq 'raw extended' and $rules{$tag} =~ /array/) {
$rules{$tag} = 'raw extended array'
} elsif ($rules{$tag} eq 'as array' or $rule eq 'as array') {
$rules{$tag} = 'as array'
} elsif ($rules{$tag} eq 'content array' and $rule eq 'content'
or $rule eq 'content array' and $rules{$tag} eq 'content') {
$rules{$tag} = 'content array'
} elsif ($rules{$tag} eq 'content array' and $rule eq 'as array no content'
or $rule eq 'content array' and $rules{$tag} eq 'as array no content') {
$rules{$tag} = 'as array'
} elsif ($rules{$tag} eq 'content array' and $rule eq 'as is'
or $rule eq 'content array' and $rules{$tag} eq 'as is') {
$rules{$tag} = 'as array'
} elsif ($rules{$tag} eq 'content array' and $rule eq 'no content'
or $rule eq 'content array' and $rules{$tag} eq 'no content') {
$rules{$tag} = 'as array'
} elsif ($rules{$tag} eq 'as array no content' and $rule eq 'as is'
or $rule eq 'as array no content' and $rules{$tag} eq 'as is') {
$rules{$tag} = 'as array'
} elsif ($rules{$tag} eq 'as array no content' and $rule eq 'content'
or $rule eq 'as array no content' and $rules{$tag} eq 'content') {
$rules{$tag} = 'as array'
} elsif ($rules{$tag} eq 'as array no content' and $rule eq 'no content'
or $rule eq 'as array no content' and $rules{$tag} eq 'no content') {
$rules{$tag} = 'as array no content'
} elsif ($rules{$tag} eq 'as is' and ($rule eq 'no content' or $rule eq 'content')
or $rule eq 'as is' and ($rules{$tag} eq 'no content' or $rules{$tag} eq 'content')) {
$rules{$tag} = 'as is'
} elsif ($rules{$tag} eq 'content' and $rule eq 'no content'
or $rule eq 'content' and $rules{$tag} eq 'no content') {
$rules{$tag} = 'as is'
} else {
die "Unexpected combination of rules: old=$rules{$tag}, new=$rule for tag $tag\n";
}
}
if ($has_content and $has_children) { # the tag contains both text content and subtags!, need to use the raw extended rules
foreach my $child (grep ref($attrs->{$_}) eq 'HASH', keys %$attrs) {
next if $rules{$child} =~ /^raw extended/;
if ($rules{$child} =~ /array/) {
$rules{$child} = 'raw extended array'
} else {
$rules{$child} = 'raw extended'
}
}
}
return $tag => {};
}
},
stripspaces => 7,
);
for (@ARGV) {
eval {
$parser->parsefile($_);
} or print STDERR "Error parsing $_: $@\n";
}
my %short_rules;
foreach my $tag (sort keys %rules) {
push @{$short_rules{$rules{$tag}}}, $tag
}
foreach my $tags (values %short_rules) {
$tags = join ',', @$tags;
}
%short_rules = reverse %short_rules;
print Dumper(\%short_rules);