# $Id$
##
# this test checks the parsing capabilities of XML::LibXML
# it relies on the success of t/01basic.t
use strict;
use warnings;
# Fix the locale for the error messages check to work:
# See https://rt.cpan.org/Public/Bug/Display.html?id=97805 .
use POSIX qw(locale_h);
use locale;
POSIX::setlocale(LC_ALL, "C");
use Test::More tests => 533;
use IO::File;
use XML::LibXML::Common qw(:libxml);
use XML::LibXML::SAX;
use XML::LibXML::SAX::Builder;
use constant XML_DECL => "<?xml version=\"1.0\"?>\n";
use Errno qw(ENOENT);
##
# test values
my @goodWFStrings = (
'<foobar/>',
'<foobar></foobar>',
XML_DECL . "<foobar></foobar>",
'<?xml version="1.0" encoding="UTF-8"?>'."\n<foobar></foobar>",
'<?xml version="1.0" encoding="ISO-8859-1"?>'."\n<foobar></foobar>",
XML_DECL. "<foobar> </foobar>\n",
XML_DECL. '<foobar><foo/></foobar> ',
XML_DECL. '<foobar> <foo/> </foobar> ',
XML_DECL. '<foobar><![CDATA[<>&"\']]></foobar>',
XML_DECL. '<foobar><>&"'</foobar>',
XML_DECL. '<foobar>  </foobar>',
XML_DECL. '<!--comment--><foobar>foo</foobar>',
XML_DECL. '<foobar>foo</foobar><!--comment-->',
XML_DECL. '<foobar>foo<!----></foobar>',
XML_DECL. '<foobar foo="bar"/>',
XML_DECL. '<foobar foo="\'bar>"/>',
#XML_DECL. '<bar:foobar foo="bar"><bar:foo/></bar:foobar>',
#'<bar:foobar/>'
);
my @goodWFNSStrings = (
XML_DECL. '<foobar xmlns:bar="xml://foo" bar:foo="bar"/>'."\n",
XML_DECL. '<foobar xmlns="xml://foo" foo="bar"><foo/></foobar>'."\n",
XML_DECL. '<bar:foobar xmlns:bar="xml://foo" foo="bar"><foo/></bar:foobar>'."\n",
XML_DECL. '<bar:foobar xmlns:bar="xml://foo" foo="bar"><bar:foo/></bar:foobar>'."\n",
XML_DECL. '<bar:foobar xmlns:bar="xml://foo" bar:foo="bar"><bar:foo/></bar:foobar>'."\n",
);
my @goodWFDTDStrings = (
XML_DECL. '<!DOCTYPE foobar ['."\n".'<!ENTITY foo " test ">'."\n".']>'."\n".'<foobar>&foo;</foobar>',
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar>&foo;</foobar>',
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar>&foo;></foobar>',
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar="foo"">]><foobar>&foo;></foobar>',
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar>&foo;></foobar>',
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar foo="&foo;"/>',
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar foo=">&foo;"/>',
);
my @badWFStrings = (
"", # totally empty document
XML_DECL, # only XML Declaration
"<!--ouch-->", # comment only is like an empty document
'<!DOCTYPE ouch [<!ENTITY foo "bar">]>', # no good either ...
"<ouch>", # single tag (tag mismatch)
"<ouch/>foo", # trailing junk
"foo<ouch/>", # leading junk
"<ouch foo=bar/>", # bad attribute
'<ouch foo="bar/>', # bad attribute
"<ouch>&</ouch>", # bad char
"<ouch>�x20;</ouch>", # bad char
"<foobär/>", # bad encoding
"<ouch>&foo;</ouch>", # undefind entity
"<ouch>></ouch>", # unterminated entity
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar &foo;="ouch"/>', # bad placed entity
XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar="foo"">]><foobar &foo;/>', # even worse
"<ouch><!---></ouch>", # bad comment
'<ouch><!-----></ouch>', # bad either... (is this conform with the spec????)
);
my %goodPushWF = (
single1 => ['<foobar/>'],
single2 => ['<foobar>','</foobar>'],
single3 => [ XML_DECL, "<foobar>", "</foobar>" ],
single4 => ["<foo", "bar/>"],
single5 => ["<", "foo","bar", "/>"],
single6 => ['<?xml version="1.0" encoding="UTF-8"?>',"\n<foobar/>"],
single7 => ['<?xml',' version="1.0" ','encoding="UTF-8"?>',"\n<foobar/>"],
single8 => ['<foobar', ' foo=', '"bar"', '/>'],
single9 => ['<?xml',' versio','n="1.0" ','encodi','ng="U','TF8"?>',"\n<foobar/>"],
multiple1 => [ '<foobar>','<foo/>','</foobar> ', ],
multiple2 => [ '<foobar','><fo','o','/><','/foobar> ', ],
multiple3 => [ '<foobar>','<![CDATA[<>&"\']]>','</foobar>'],
multiple4 => [ '<foobar>','<![CDATA[', '<>&', ']]>', '</foobar>' ],
multiple5 => [ '<foobar>','<!','[CDA','TA[', '<>&', ']]>', '</foobar>' ],
multiple6 => ['<foobar>','<>&"'','</foobar>'],
multiple6 => ['<foobar>','<',';&','gt;&a','mp;','"&ap','os;','</foobar>'],
multiple7 => [ '<foobar>', '  ','</foobar>' ],
multiple8 => [ '<foobar>', '&#x','20;','60;','</foobar>' ],
multiple9 => [ '<foobar>','moo','moo','</foobar> ', ],
multiple10 => [ '<foobar>','moo','</foobar> ', ],
comment1 => [ '<!--comment-->','<foobar/>' ],
comment2 => [ '<foobar/>','<!--comment-->' ],
comment3 => [ '<!--','comment','-->','<foobar/>' ],
comment4 => [ '<!--','-->','<foobar/>' ],
comment5 => [ '<foobar>fo','o<!---','-><','/foobar>' ],
attr1 => [ '<foobar',' foo="bar"/>'],
attr2 => [ '<foobar',' foo','="','bar','"/>'],
attr3 => [ '<foobar',' fo','o="b','ar"/>'],
#prefix1 => [ '<bar:foobar/>' ],
#prefix2 => [ '<bar',':','foobar/>' ],
#prefix3 => [ '<ba','r:fo','obar/>' ],
ns1 => [ '<foobar xmlns:bar="xml://foo"/>' ],
ns2 => [ '<foobar ','xmlns:bar="xml://foo"','/>' ],
ns3 => [ '<foo','bar x','mlns:b','ar="foo"/>' ],
ns4 => [ '<bar:foobar xmlns:bar="xml://foo"/>' ],
ns5 => [ '<bar:foo','bar xm','lns:bar="fo','o"/>' ],
ns6 => [ '<bar:fooba','r xm','lns:ba','r="foo"','><bar',':foo/','></bar'.':foobar>'],
dtd1 => [XML_DECL, '<!DOCTYPE ','foobar [','<!ENT','ITY foo " test ">',']>','<foobar>&f','oo;</foobar>',],
dtd2 => [XML_DECL, '<!DOCTYPE ','foobar [','<!ENT','ITY foo " test ">',']>','<foobar>&f','oo;></foobar>',],
);
my $goodfile = "example/dromeds.xml";
my $badfile1 = "example/bad.xml";
my $badfile2 = "does_not_exist.xml";
my $parser = XML::LibXML->new();
# 1 NON VALIDATING PARSER
# 1.1 WELL FORMED STRING PARSING
# 1.1.1 DEFAULT VALUES
{
foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) {
my $doc = $parser->parse_string($str);
isa_ok($doc, 'XML::LibXML::Document');
}
}
eval { my $fail = $parser->parse_string(undef); };
like($@, qr/^Empty String at/, "parses undef string with an error");
foreach my $str ( @badWFStrings ) {
eval { my $fail = $parser->parse_string($str); };
ok($@, "Error thrown passing '" . shorten_string($str) . "'");
}
# 1.1.2 NO KEEP BLANKS
$parser->keep_blanks(0);
{
foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) {
my $doc = $parser->parse_string($str);
isa_ok($doc, 'XML::LibXML::Document');
}
}
eval { my $fail = $parser->parse_string(undef); };
like($@, qr/^Empty String at/, "parses undef string with an error");
foreach my $str ( @badWFStrings ) {
eval { my $fail = $parser->parse_string($str); };
ok($@, "Error thrown passing '" . shorten_string($str) . "'");
}
$parser->keep_blanks(1);
# 1.1.3 EXPAND ENTITIES
$parser->expand_entities(0);
{
foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) {
my $doc = $parser->parse_string($str);
isa_ok($doc, 'XML::LibXML::Document');
}
}
eval { my $fail = $parser->parse_string(undef); };
like($@, qr/^Empty String at/, "parses undef string with an error");
foreach my $str ( @badWFStrings ) {
eval { my $fail = $parser->parse_string($str); };
ok($@, "Error thrown passing '" . shorten_string($str) . "'");
}
$parser->expand_entities(1);
# 1.1.4 PEDANTIC
$parser->pedantic_parser(1);
{
foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) {
my $doc = $parser->parse_string($str);
isa_ok($doc, 'XML::LibXML::Document');
}
}
eval { my $fail = $parser->parse_string(undef); };
like($@, qr/^Empty String at/, "parses undef string with an error");
foreach my $str ( @badWFStrings ) {
eval { my $fail = $parser->parse_string($str); };
ok($@, "Error thrown passing '" . shorten_string($str) . "'");
}
$parser->pedantic_parser(0);
# 1.2 PARSE A FILE
{
my $doc = $parser->parse_file($goodfile);
isa_ok($doc, 'XML::LibXML::Document');
}
eval {my $fail = $parser->parse_file($badfile1);};
like($@, qr/^$badfile1:3: parser error : Extra content at the end of the document/, "error parsing $badfile1");
{
# This is to fix https://rt.cpan.org/Public/Bug/Display.html?id=69248
# Testing for localised error messages.
$! = ENOENT;
my $err_string = "$!";
$! = 0;
my $re = qr/\ACould not create file parser context for file "\Q$badfile2\E": \Q$err_string\E/;
eval { $parser->parse_file($badfile2); };
like($@, $re, "error parsing non-existent $badfile2");
}
{
my $str = "<a> <b/> </a>";
my $tstr= "<a><b/></a>";
$parser->keep_blanks(0);
my $docA = $parser->parse_string($str);
my $docB = $parser->parse_file("example/test3.xml");
$XML::LibXML::skipXMLDeclaration = 1;
is( $docA->toString, $tstr, "xml string round trips as expected");
is( $docB->toString, $tstr, "test3.xml round trips as expected");
$XML::LibXML::skipXMLDeclaration = 0;
}
# 1.3 PARSE A HANDLE
my $fh = IO::File->new($goodfile);
isa_ok($fh, 'IO::File');
my $doc = $parser->parse_fh($fh);
isa_ok($doc, 'XML::LibXML::Document');
$fh = IO::File->new($badfile1);
isa_ok($fh, 'IO::File');
eval { my $doc = $parser->parse_fh($fh); };
like($@, qr/^Entity: line 3: parser error : Extra content at the end of the document/, "error parsing bad file from file handle of $badfile1");
$fh = IO::File->new($badfile2);
eval { my $doc = $parser->parse_fh($fh); };
like($@, qr/^Can't use an undefined value as a symbol reference at/, "error parsing bad file from file handle of non-existent $badfile2");
{
$parser->expand_entities(1);
my $doc = $parser->parse_file( "example/dtd.xml" );
my @cn = $doc->documentElement->childNodes;
is( scalar @cn, 1, "1 child node" );
$doc = $parser->parse_file( "example/complex/complex2.xml" );
@cn = $doc->documentElement->childNodes;
is( scalar @cn, 1, "1 child node" );
$parser->expand_entities(0);
$doc = $parser->parse_file( "example/dtd.xml" );
@cn = $doc->documentElement->childNodes;
is( scalar @cn, 3, "3 child nodes" );
}
# 1.4 x-include processing
my $goodXInclude = q{
<x>
<xinclude:include
xmlns:xinclude="http://www.w3.org/2001/XInclude"
href="test2.xml"/>
</x>
};
my $badXInclude = q{
<x xmlns:xinclude="http://www.w3.org/2001/XInclude">
<xinclude:include href="bad.xml"/>
</x>
};
{
$parser->base_uri( "example/" );
$parser->keep_blanks(0);
my $doc = $parser->parse_string( $goodXInclude );
isa_ok($doc, 'XML::LibXML::Document');
my $i;
eval { $i = $parser->processXIncludes($doc); };
is( $i, "1", "return value from processXIncludes == 1");
$doc = $parser->parse_string( $badXInclude );
$i= undef;
eval { $i = $parser->processXIncludes($doc); };
like($@, qr/$badfile1:3: parser error : Extra content at the end of the document/, "error parsing a bad include");
# auto expand
$parser->expand_xinclude(1);
$doc = $parser->parse_string( $goodXInclude );
isa_ok($doc, 'XML::LibXML::Document');
$doc = undef;
eval { $doc = $parser->parse_string( $badXInclude ); };
like($@, qr/$badfile1:3: parser error : Extra content at the end of the document/, "error parsing $badfile1 in include");
is($doc, undef, "no doc returned");
# some bad stuff
eval{ $parser->processXIncludes(undef); };
like($@, qr/^No document to process! at/, "Error parsing undef include");
eval{ $parser->processXIncludes("blahblah"); };
like($@, qr/^No document to process! at/, "Error parsing bogus include");
}
# 2 PUSH PARSER
{
my $pparser = XML::LibXML->new();
# 2.1 PARSING WELLFORMED DOCUMENTS
foreach my $key ( qw(single1 single2 single3 single4 single5 single6
single7 single8 single9 multiple1 multiple2 multiple3
multiple4 multiple5 multiple6 multiple7 multiple8
multiple9 multiple10 comment1 comment2 comment3
comment4 comment5 attr1 attr2 attr3
ns1 ns2 ns3 ns4 ns5 ns6 dtd1 dtd2) ) {
foreach ( @{$goodPushWF{$key}} ) {
$pparser->parse_chunk( $_ );
}
my $doc;
eval {$doc = $pparser->parse_chunk("",1); };
is($@, '', "No error parsing $key");
isa_ok($doc, 'XML::LibXML::Document', "Document came back parsing chunk: ");
}
my @good_strings = ("<foo>", "bar", "</foo>" );
my %bad_strings = (
predocend1 => ["<A>" ],
predocend2 => ["<A>", "B"],
predocend3 => ["<A>", "<C>"],
predocend4 => ["<A>", "<C/>"],
postdocend1 => ["<A/>", "<C/>"],
# use with libxml2 2.4.26: postdocend2 => ["<A/>", "B"], # libxml2 < 2.4.26 bug
postdocend3 => ["<A/>", "BB"],
badcdata => ["<A> ","<!","[CDATA[B]","</A>"],
badending1 => ["<A> ","B","</C>"],
badending2 => ["<A> ","</C>","</A>"],
);
my $parser = XML::LibXML->new;
{
for ( @good_strings ) {
$parser->parse_chunk( $_ );
}
my $doc = $parser->parse_chunk("",1);
isa_ok($doc, 'XML::LibXML::Document');
}
{
# 2.2 PARSING BROKEN DOCUMENTS
my $doc;
foreach my $key ( keys %bad_strings ) {
$doc = undef;
my $bad_chunk;
foreach ( @{$bad_strings{$key}} ) {
eval { $parser->parse_chunk( $_ );};
if ( $@ ) {
# if we won't stop here, we will lose the error :|
$bad_chunk = $_;
last;
}
}
if ( $@ ) {
isnt($@, '', "Error found parsing chunk $bad_chunk");
# $parser->parse_chunk("",1); # will cause no harm anymore, but is still needed
next;
}
eval {
$doc = $parser->parse_chunk("",1);
};
isnt($@, '', "Got an error parsing empty chunk after chunks for $key");
}
}
{
# 2.3 RECOVERING PUSH PARSER
$parser->init_push;
foreach ( "<A>", "B" ) {
$parser->push( $_);
}
my $doc;
eval {
local $SIG{'__WARN__'} = sub { };
$doc = $parser->finish_push(1);
};
isa_ok( $doc, 'XML::LibXML::Document' );
}
}
# 3 SAX PARSER
{
my $handler = XML::LibXML::SAX::Builder->new();
my $generator = XML::LibXML::SAX->new( Handler=>$handler );
my $string = q{<bar foo="bar">foo</bar>};
$doc = $generator->parse_string( $string );
isa_ok( $doc , 'XML::LibXML::Document');
# 3.1 GENERAL TESTS
foreach my $str ( @goodWFStrings ) {
my $doc = $generator->parse_string( $str );
isa_ok( $doc , 'XML::LibXML::Document');
}
# CDATA Sections
$string = q{<foo><![CDATA[&foo<bar]]></foo>};
$doc = $generator->parse_string( $string );
my @cn = $doc->documentElement->childNodes();
is( scalar @cn, 1, "Child nodes - 1" );
is( $cn[0]->nodeType, XML_CDATA_SECTION_NODE );
is( $cn[0]->textContent, "&foo<bar" );
is( $cn[0]->toString, '<![CDATA[&foo<bar]]>');
# 3.2 NAMESPACE TESTS
my $i = 0;
foreach my $str ( @goodWFNSStrings ) {
my $doc = $generator->parse_string( $str );
isa_ok( $doc , 'XML::LibXML::Document');
# skip the nested node tests until there is a xmlNormalizeNs().
#ok(1),next if $i > 2;
is( $doc->toString(), $str );
$i++
}
# DATA CONSISTENCE
# find out if namespaces are there
my $string2 = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi/></foo>};
$doc = $generator->parse_string( $string2 );
my @attrs = $doc->documentElement->attributes;
is(scalar @attrs , 1, "1 attribute");
is( $attrs[0]->nodeType, XML_NAMESPACE_DECL, "Node type: " . XML_NAMESPACE_DECL );
my $root = $doc->documentElement;
# bad thing: i have to do some NS normalizing.
# libxml2 will only do some fixing. this will lead to multiple
# declarations, if a node with a new namespace is added.
my $vstring = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi/></foo>};
# my $vstring = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi xmlns:bar="http://foo.bar"/></foo>};
is($root->toString, $vstring );
# 3.3 INTERNAL SUBSETS
foreach my $str ( @goodWFDTDStrings ) {
my $doc = $generator->parse_string( $str );
isa_ok( $doc , 'XML::LibXML::Document');
}
# 3.5 PARSE URI
$doc = $generator->parse_uri( "example/test.xml" );
isa_ok($doc, 'XML::LibXML::Document');
# 3.6 PARSE CHUNK
}
# 4 SAXY PUSHER
{
my $handler = XML::LibXML::SAX::Builder->new();
my $parser = XML::LibXML->new;
$parser->set_handler( $handler );
$parser->push( '<foo/>' );
my $doc = $parser->finish_push;
isa_ok($doc , 'XML::LibXML::Document');
foreach my $key ( keys %goodPushWF ) {
foreach ( @{$goodPushWF{$key}} ) {
$parser->push( $_);
}
my $doc;
eval {$doc = $parser->finish_push; };
isa_ok( $doc , 'XML::LibXML::Document');
}
}
# 5 PARSE WELL BALANCED CHUNKS
{
my $MAX_WF_C = 11;
my $MAX_WB_C = 16;
my %chunks = (
wellformed1 => '<A/>',
wellformed2 => '<A></A>',
wellformed3 => '<A B="C"/>',
wellformed4 => '<A>D</A>',
wellformed5 => '<A><![CDATA[D]]></A>',
wellformed6 => '<A><!--D--></A>',
wellformed7 => '<A><K/></A>',
wellformed8 => '<A xmlns="xml://E"/>',
wellformed9 => '<F:A xmlns:F="xml://G" F:A="B">D</F:A>',
wellformed10 => '<!--D-->',
wellformed11 => '<A xmlns:F="xml://E"/>',
wellbalance1 => '<A/><A/>',
wellbalance2 => '<A></A><A></A>',
wellbalance3 => '<A B="C"/><A B="H"/>',
wellbalance4 => '<A>D</A><A>I</A>',
wellbalance5 => '<A><K/></A><A><L/></A>',
wellbalance6 => '<A><![CDATA[D]]></A><A><![CDATA[I]]></A>',
wellbalance7 => '<A><!--D--></A><A><!--I--></A>',
wellbalance8 => '<F:A xmlns:F="xml://G" F:A="B">D</F:A><J:A xmlns:J="xml://G" J:A="M">D</J:A>',
wellbalance9 => 'D<A/>',
wellbalance10=> 'D<A/>D',
wellbalance11=> 'D<A/><!--D-->',
wellbalance12=> 'D<A/><![CDATA[D]]>',
wellbalance13=> '<![CDATA[D]]><A/>D',
wellbalance14=> '<!--D--><A/>',
wellbalance15=> '<![CDATA[D]]>',
wellbalance16=> 'D',
);
my @badWBStrings = (
"",
"<ouch>",
"<ouch>bar",
"bar</ouch>",
"<ouch/>&foo;", # undefined entity
"&", # bad char
"häh?", # bad encoding
"<!--->", # bad stays bad ;)
"<!----->", # bad stays bad ;)
);
my $pparser = XML::LibXML->new;
# 5.1 DOM CHUNK PARSER
for ( 1..$MAX_WF_C ) {
my $frag = $pparser->parse_xml_chunk($chunks{'wellformed'.$_});
isa_ok($frag, 'XML::LibXML::DocumentFragment');
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) {
if ( $chunks{'wellformed' . $_} =~ /\<A\>\<\/A\>/ ) {
$_--; # because we cannot distinguish between <a/> and <a></a>
}
is($frag->toString, $chunks{'wellformed' . $_}, $chunks{'wellformed' . $_} . " is well formed");
next;
}
}
fail("Unexpected fragment without child nodes");
}
for ( 1..$MAX_WB_C ) {
my $frag = $pparser->parse_xml_chunk($chunks{'wellbalance'.$_});
isa_ok($frag, 'XML::LibXML::DocumentFragment');
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $chunks{'wellbalance'.$_} =~ /<A><\/A>/ ) {
$_--;
}
is($frag->toString, $chunks{'wellbalance'.$_}, $chunks{'wellbalance'.$_} . " is well balanced");
next;
}
fail("Can't test balancedness");
}
eval { my $fail = $pparser->parse_xml_chunk(undef); };
like($@, qr/^Empty String at/, "error parsing undef xml chunk");
eval { my $fail = $pparser->parse_xml_chunk(""); };
like($@, qr/^Empty String at/, "error parsing empty xml chunk");
foreach my $str ( @badWBStrings ) {
eval { my $fail = $pparser->parse_xml_chunk($str); };
isnt($@, '', "Error parsing xml chunk: '" . shorten_string($str) . "'");
}
{
# 5.1.1 Segmenation fault tests
my $sDoc = '<C/><D/>';
my $sChunk = '<A/><B/>';
my $parser = XML::LibXML->new();
my $doc = $parser->parse_xml_chunk( $sDoc, undef );
my $chk = $parser->parse_xml_chunk( $sChunk,undef );
my $fc = $doc->firstChild;
$doc->appendChild( $chk );
is( $doc->toString(), '<C/><D/><A/><B/>', 'No segfault parsing string "<C/><D/><A/><B/>"');
}
{
# 5.1.2 Segmenation fault tests
my $sDoc = '<C/><D/>';
my $sChunk = '<A/><B/>';
my $parser = XML::LibXML->new();
my $doc = $parser->parse_xml_chunk( $sDoc, undef );
my $chk = $parser->parse_xml_chunk( $sChunk,undef );
my $fc = $doc->firstChild;
$doc->insertAfter( $chk, $fc );
is( $doc->toString(), '<C/><A/><B/><D/>', 'No segfault parsing string "<C/><A/><B/><D/>"');
}
{
# 5.1.3 Segmenation fault tests
my $sDoc = '<C/><D/>';
my $sChunk = '<A/><B/>';
my $parser = XML::LibXML->new();
my $doc = $parser->parse_xml_chunk( $sDoc, undef );
my $chk = $parser->parse_xml_chunk( $sChunk,undef );
my $fc = $doc->firstChild;
$doc->insertBefore( $chk, $fc );
ok( $doc->toString(), '<A/><B/><C/><D/>' );
}
pass("Made it to SAX test without seg fault");
# 5.2 SAX CHUNK PARSER
my $handler = XML::LibXML::SAX::Builder->new();
my $parser = XML::LibXML->new;
$parser->set_handler( $handler );
for ( 1..$MAX_WF_C ) {
my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_});
isa_ok($frag, 'XML::LibXML::DocumentFragment');
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) {
if ( $chunks{'wellformed'.$_} =~ /\<A\>\<\/A\>/ ) {
$_--;
}
is($frag->toString, $chunks{'wellformed'.$_}, $chunks{'wellformed'.$_} . ' is well formed');
next;
}
}
fail("Couldn't pass well formed test since frag was bad");
}
for ( 1..$MAX_WB_C ) {
my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_});
isa_ok($frag, 'XML::LibXML::DocumentFragment');
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $chunks{'wellbalance'.$_} =~ /<A><\/A>/ ) {
$_--;
}
is($frag->toString, $chunks{'wellbalance'.$_}, $chunks{'wellbalance'.$_} . " is well balanced");
next;
}
fail("Couldn't pass well balanced test since frag was bad");
}
}
{
# 6 VALIDATING PARSER
my %badstrings = (
SIMPLE => '<?xml version="1.0"?>'."\n<A/>\n",
);
my $parser = XML::LibXML->new;
$parser->validation(1);
my $doc;
eval { $doc = $parser->parse_string($badstrings{SIMPLE}); };
isnt($@, '', "Failed to parse SIMPLE bad string");
my $ql;
}
{
# 7 LINE NUMBERS
my $goodxml = <<EOXML;
<?xml version="1.0"?>
<foo>
<bar/>
</foo>
EOXML
my $badxml = <<EOXML;
<?xml version="1.0"?>
<!DOCTYPE foo [<!ELEMENT foo EMPTY>]>
<bar/>
EOXML
my $parser = XML::LibXML->new;
$parser->validation(1);
eval { $parser->parse_string( $badxml ); };
# correct line number may or may not be present
# depending on libxml2 version
like($@, qr/^:[03]:/, "line 03 found in error" );
$parser->line_numbers(1);
eval { $parser->parse_string( $badxml ); };
like($@, qr/^:3:/, "line 3 found in error");
# switch off validation for the following tests
$parser->validation(0);
my $doc;
eval { $doc = $parser->parse_string( $goodxml ); };
my $root = $doc->documentElement();
is( $root->line_number(), 2, "line number is 2");
my @kids = $root->childNodes();
is( $kids[1]->line_number(),3, "line number is 3" );
my $newkid = $root->appendChild( $doc->createElement( "bar" ) );
is( $newkid->line_number(), 0, "line number is 0");
$parser->line_numbers(0);
eval { $doc = $parser->parse_string( $goodxml ); };
$root = $doc->documentElement();
is( $root->line_number(), 0, "line number is 0");
@kids = $root->childNodes();
is( $kids[1]->line_number(), 0, "line number is 0");
}
SKIP: {
skip("LibXML version is below 20600", 8) unless ( XML::LibXML::LIBXML_VERSION >= 20600 );
# 8 Clean Namespaces
my ( $xsDoc1, $xsDoc2 );
$xsDoc1 = q{<A:B xmlns:A="http://D"><A:C xmlns:A="http://D"></A:C></A:B>};
$xsDoc2 = q{<A:B xmlns:A="http://D"><A:C xmlns:A="http://E"/></A:B>};
my $parser = XML::LibXML->new();
$parser->clean_namespaces(1);
my $fn1 = "example/xmlns/goodguy.xml";
my $fn2 = "example/xmlns/badguy.xml";
is( $parser->parse_string( $xsDoc1 )->documentElement->toString(),
q{<A:B xmlns:A="http://D"><A:C/></A:B>} );
is( $parser->parse_string( $xsDoc2 )->documentElement->toString(),
$xsDoc2 );
is( $parser->parse_file( $fn1 )->documentElement->toString(),
q{<A:B xmlns:A="http://D"><A:C/></A:B>} );
is( $parser->parse_file( $fn2 )->documentElement->toString() ,
$xsDoc2 );
my $fh1 = IO::File->new($fn1);
my $fh2 = IO::File->new($fn2);
is( $parser->parse_fh( $fh1 )->documentElement->toString(),
q{<A:B xmlns:A="http://D"><A:C/></A:B>} );
is( $parser->parse_fh( $fh2 )->documentElement->toString() ,
$xsDoc2 );
my @xaDoc1 = ('<A:B xmlns:A="http://D">','<A:C xmlns:A="h','ttp://D"/>' ,'</A:B>');
my @xaDoc2 = ('<A:B xmlns:A="http://D">','<A:C xmlns:A="h','ttp://E"/>' , '</A:B>');
my $doc;
foreach ( @xaDoc1 ) {
$parser->parse_chunk( $_ );
}
$doc = $parser->parse_chunk( "", 1 );
is( $doc->documentElement->toString(),
q{<A:B xmlns:A="http://D"><A:C/></A:B>} );
foreach ( @xaDoc2 ) {
$parser->parse_chunk( $_ );
}
$doc = $parser->parse_chunk( "", 1 );
is( $doc->documentElement->toString() ,
$xsDoc2 );
};
##
# test if external subsets are loaded correctly
{
my $xmldoc = <<EOXML;
<!DOCTYPE X SYSTEM "example/ext_ent.dtd">
<X>&foo;</X>
EOXML
my $parser = XML::LibXML->new();
$parser->load_ext_dtd(1);
# first time it should work
my $doc = $parser->parse_string( $xmldoc );
is( $doc->documentElement()->string_value(), " test " );
# second time it must not fail.
my $doc2 = $parser->parse_string( $xmldoc );
is( $doc2->documentElement()->string_value(), " test " );
}
##
# Test ticket #7668 xinclude breaks entity expansion
# [CG] removed again, since #7668 claims the spec is incorrect
##
# Test ticket #7913
{
my $xmldoc = <<EOXML;
<!DOCTYPE X SYSTEM "example/ext_ent.dtd">
<X>&foo;</X>
EOXML
my $parser = XML::LibXML->new();
$parser->load_ext_dtd(1);
# first time it should work
my $doc = $parser->parse_string( $xmldoc );
is( $doc->documentElement()->string_value(), " test " );
# lets see if load_ext_dtd(0) works
$parser->load_ext_dtd(0);
my $doc2;
eval {
$doc2 = $parser->parse_string( $xmldoc );
};
isnt($@, '', "error parsing $xmldoc");
$parser->validation(1);
$parser->load_ext_dtd(0);
my $doc3;
eval {
$doc3 = $parser->parse_file( "example/article_external_bad.xml" );
};
isa_ok( $doc3, 'XML::LibXML::Document');
$parser->load_ext_dtd(1);
eval {
$doc3 = $parser->parse_file( "example/article_external_bad.xml" );
};
isnt($@, '', "error parsing example/article_external_bad.xml");
}
{
my $parser = XML::LibXML->new();
my $doc = $parser->parse_string('<foo xml:base="foo.xml"/>',"bar.xml");
my $el = $doc->documentElement;
is( $doc->URI, "bar.xml" );
is( $doc->baseURI, "bar.xml" );
is( $el->baseURI, "foo.xml" );
$doc->setURI( "baz.xml" );
is( $doc->URI, "baz.xml" );
is( $doc->baseURI, "baz.xml" );
is( $el->baseURI, "foo.xml" );
$doc->setBaseURI( "bag.xml" );
is( $doc->URI, "bag.xml" );
is( $doc->baseURI, "bag.xml" );
is( $el->baseURI, "foo.xml" );
$el->setBaseURI( "bam.xml" );
is( $doc->URI, "bag.xml" );
is( $doc->baseURI, "bag.xml" );
is( $el->baseURI, "bam.xml" );
}
{
my $parser = XML::LibXML->new();
my $doc = $parser->parse_html_string('<html><head><base href="foo.html"></head><body></body></html>',{ URI => "bar.html" });
my $el = $doc->documentElement;
is( $doc->URI, "bar.html" );
is( $doc->baseURI, "foo.html" );
is( $el->baseURI, "foo.html" );
$doc->setURI( "baz.html" );
is( $doc->URI, "baz.html" );
is( $doc->baseURI, "foo.html" );
is( $el->baseURI, "foo.html" );
}
{
my $parser = XML::LibXML->new();
open(my $fh, '<:utf8', 't/data/chinese.xml');
ok( $fh, 'open chinese.xml');
eval {
$parser->parse_fh($fh);
};
like( $@, qr/Read more bytes than requested/,
'UTF-8 encoding layer throws exception' );
close($fh);
}
sub tsub {
my $doc = shift;
my $th = {};
$th->{d} = XML::LibXML::Document->createDocument;
my $e1 = $th->{d}->createElementNS("x","X:foo");
$th->{d}->setDocumentElement( $e1 );
my $e2 = $th->{d}->createElementNS( "x","X:bar" );
$e1->appendChild( $e2 );
$e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) );
return $th->{d};
}
sub tsub2 {
my ($doc,$query)=($_[0],@{$_[1]});
# return [ $doc->findnodes($query) ];
return [ $doc->findnodes(encodeToUTF8('iso-8859-1',$query)) ];
}
sub shorten_string { # Used for test naming.
my $string = shift;
return "'undef'" if(!defined $string);
$string =~ s/\n/\\n/msg;
return $string if(length($string) < 25);
return $string = substr($string, 0, 10) . "..." . substr($string, -10);
}