The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# Check examples in http://www.w3.org/TR/2008/REC-xml-c14n11-20080502
use warnings;
use strict;
use utf8;

use Test::More tests => 8;

use XML::Compile::Cache;
use XML::Compile::Tester     qw/compare_xml/;
use XML::Compile::C14N       ();
use XML::Compile::C14N::Util ':c14n';

#use Encode                   qw/encode decode/;

my $cache = XML::Compile::Cache->new;
my $c14n  = XML::Compile::C14N->new(schema => $cache);

sub to_xml($) { XML::LibXML->load_xml(string => $_[0]) }

### Example 3.1

my $in1  = to_xml <<'_INPUT_DOCUMENT';
<?xml version="1.0"?>

<?xml-stylesheet   href="doc.xsl"
   type="text/xsl"   ?>

<!DOCTYPE doc SYSTEM "doc.dtd">

<doc>Hello, world!<!-- Comment 1 --></doc>

<?pi-without-data     ?>

<!-- Comment 2 -->

<!-- Comment 3 -->
_INPUT_DOCUMENT

my $out1a = $c14n->normalize(C14N_v11_NO_COMM, $in1);
is($out1a."\n", <<'_NO_COMM');
<?xml-stylesheet href="doc.xsl"
   type="text/xsl"   ?>
<doc>Hello, world!</doc>
<?pi-without-data?>
_NO_COMM

my $out1b = $c14n->normalize(C14N_v11_COMMENTS, $in1);
is($out1b."\n", <<'_WITH_COMM');
<?xml-stylesheet href="doc.xsl"
   type="text/xsl"   ?>
<doc>Hello, world!<!-- Comment 1 --></doc>
<?pi-without-data?>
<!-- Comment 2 -->
<!-- Comment 3 -->
_WITH_COMM

### Example 3.2

my $in2 = to_xml <<'_INPUT_DOCUMENT';
<doc>
   <clean>   </clean>
   <dirty>   A   B   </dirty>
   <mixed>
      A
      <clean>   </clean>
      B
      <dirty>   A   B   </dirty>
      C
   </mixed>
</doc>
_INPUT_DOCUMENT

my $out2 = $c14n->normalize(C14N_v11_COMMENTS, $in2);
#is($out2, encode utf8 => $in2->documentElement);
is($out2, $in2->documentElement->toString);

### Example 3.3

my $in3 = to_xml <<'_INPUT_DOCUMENT';
<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
<doc>
   <e1   />
   <e2   ></e2>
   <e3   name = "elem3"   id="elem3"   />
   <e4   name="elem4"   id="elem4"   ></e4>
   <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
      xmlns:b="http://www.ietf.org"
      xmlns:a="http://www.w3.org"
      xmlns="http://example.org"/>
   <e6 xmlns="" xmlns:a="http://www.w3.org">
      <e7 xmlns="http://www.ietf.org">
         <e8 xmlns="" xmlns:a="http://www.w3.org">
            <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
         </e8>
      </e7>
   </e6>
</doc>
_INPUT_DOCUMENT

my $out3 = $c14n->normalize(C14N_v11_COMMENTS, $in3);
is($out3."\n", <<'_CANON');
<doc>
   <e1></e1>
   <e2></e2>
   <e3 id="elem3" name="elem3"></e3>
   <e4 id="elem4" name="elem4"></e4>
   <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
   <e6 xmlns:a="http://www.w3.org">
      <e7 xmlns="http://www.ietf.org">
         <e8 xmlns="">
            <e9 xmlns:a="http://www.ietf.org"></e9>
         </e8>
      </e7>
   </e6>
</doc>
_CANON

# NB: the specification says
#    -      <e9 xmlns:a="http://www.ietf.org"></e9>
#    +      <e9 xmlns:a="http://www.ietf.org" attr="default"></e9>
# But I think that is a bug: the default ns is already set.

### Example 3.4

my $in4 = to_xml <<'_INPUT_DOCUMENT';
<!DOCTYPE doc [
<!ATTLIST normId id ID #IMPLIED>
<!ATTLIST normNames attr NMTOKENS #IMPLIED>
]>
<doc>
   <text>First line&#x0d;&#10;Second line</text>
   <value>&#x32;</value>
   <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
   <compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>
   <norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>
   <normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>
   <normId id=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>
</doc>
_INPUT_DOCUMENT

my $out4 = $c14n->normalize(C14N_v11_COMMENTS, $in4);
is($out4."\n", <<'_CANON');
<doc>
   <text>First line&#xD;
Second line</text>
   <value>2</value>
   <compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>
   <compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>
   <norm attr=" '    &#xD;&#xA;&#x9;   ' "></norm>
   <normNames attr="A &#xD;&#xA;&#x9; B"></normNames>
   <normId id="' &#xD;&#xA;&#x9; '"></normId>
</doc>
_CANON


### Example 3.5

my $in5 = to_xml <<'_INPUT_DOCUMENT';
<!DOCTYPE doc [
<!ATTLIST doc attrExtEnt ENTITY #IMPLIED>
<!ENTITY ent1 "Hello">
<!ENTITY ent2 SYSTEM "world.txt">
<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
<!NOTATION gif SYSTEM "viewgif.exe">
]>
<doc attrExtEnt="entExt">
   &ent1;, &ent2;!
</doc>

<!-- Let world.txt contain "world" (excluding the quotes) --> 
_INPUT_DOCUMENT

my $out5 = $c14n->normalize(C14N_v11_NO_COMM, $in5);
is($out5."\n", <<'_CANON');
<doc attrExtEnt="entExt">
   Hello, world!
</doc>
_CANON


### Example 3.6

my $in6 = to_xml <<'_INPUT_DOCUMENT';
<?xml version="1.0" encoding="ISO-8859-1"?>
<doc>&#169;</doc> 
_INPUT_DOCUMENT

my $out6 = $c14n->normalize(C14N_v11_NO_COMM, $in6);
is($out6."\n", <<'_CANON');
<doc>©</doc>
_CANON


### Example 3.7

my $in7 = to_xml <<'_INPUT_DOCUMENT';
<!DOCTYPE doc [
<!ATTLIST e2 xml:space (default|preserve) 'preserve'>
<!ATTLIST e3 id ID #IMPLIED>
]>
<doc xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org">
   <e1>
      <e2 xmlns="">
         <e3 id="E3"/>
      </e2>
   </e1>
</doc>
_INPUT_DOCUMENT

my $xpath7 = <<'_XPATH';
(//. | //@* | //namespace::*)
[
   self::ietf:e1 or (parent::ietf:e1 and not(self::text() or self::e2))
   or
   count(id("E3")|ancestor-or-self::node()) = count(ancestor-or-self::node())
]
_XPATH

my $context7 = XML::LibXML::XPathContext->new;
$context7->registerNs(ietf => 'http://www.ietf.org');

TODO: {
   diag "**** I know of this error, but do not know how to fix it!";
   diag "**** it is an example taken from the c14n spec.  Can you help?";

   local $TODO = 'No idea how this should work';
   my $out7 = eval { $c14n->normalize(C14N_v11_NO_COMM, $in7, xpath => $xpath7,
      context => $context7) };

   no warnings;
   is($out7."\n", <<'_CANON');
<e1 xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org"><e3 xmlns="" id="E3" xml:space="preserve"></e3></e1> 
_CANON

}