use v6-alpha;
use Test;
plan 11;
use HTML::Entities; pass "(dummy instead of broken use_ok)";
my $a = "Våre norske tegn bør æres";
$a = decode_entities($a);
is $a, "Våre norske tegn bør æres", 'Decoding entities should work';
$a = encode_entities($a);
is $a, "Våre norske tegn bør æres",
'... and encoding entities should work';
$a = decode_entities($a);
$a = encode_entities_numeric($a);
is $a, "Våre norske tegn bør æres",
'... and encode_entities_numeric should also work.';
$a = "<&>";
is encode_entities($a), "<&>",
'We should be able to encode basic HTML entities';
$a = "<&>";
is encode_entities_numeric($a), "<&>",
'... or encode them numerically, if desired.';
$a = "Våre norske tegn bør æres";
decode_entities($a);
is($a, "Våre norske tegn bør æres",
'Decoding entities should work in void context');
encode_entities($a);
is($a, "Våre norske tegn bør æres",
'... and encoding entities should also work in void context');
$a = "abcdef";
is encode_entities($a, 'a-c'), "abcdef",
'We should be able to include the range of characters to encode.';
my $b = "<&>";
is(decode_entities([$a, $b]), [ 'abcdef', '<&>' ], "Decoding an array ref should work.");
is(decode_entities($a, $b), [ 'abcdef', '<&>' ], "Decoding a list should work too.");
=head
# See how well it does against rfc1866...
$ent = $plain = "";
while (<DATA>) {
next unless /^\s*<!ENTITY\s+(\w+)\s*CDATA\s*\"&\#(\d+)/;
$ent .= "&$0;";
$plain .= chr($1);
}
print ">>>>$ent\n>>>>$plain\n";
$a = $ent;
decode_entities($a);
print "DDD>$a\n";
print "not " if $a ne $plain;
print "ok 7\n";
# Try decoding when the ";" are left out
$a = $ent,
$a =~ s/;//g;
decode_entities($a);
print ";;;>$a\n";
print "not " if $a ne $plain;
print "ok 8\n";
$a = $plain;
encode_entities($a);
print "EEE>$a\n";
print "not " if $a ne $ent;
print "ok 9\n";
# From: Bill Simpson-Young <bill.simpson-young@cmis.csiro.au>
# Subject: HTML entities problem with 5.11
# To: libwww-perl@ics.uci.edu
# Date: Fri, 05 Sep 1997 16:56:55 +1000
# Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU>
#
# Hi. I've got a problem that has surfaced with the changes to
# HTML::Entities.pm for 5.11 (it doesn't happen with 5.08). It's happening
# in the process of encoding then decoding special entities. Eg, what goes
# in as "abc&def&ghi" comes out as "abc&def;&ghi;".
print "not " unless decode_entities("abc&def&ghi&abc;&def;") eq
"abc&def&ghi&abc;&def;";
print "ok 10\n";
# Decoding of '
print "not " unless decode_entities("'") eq "'" &&
encode_entities("'", "'") eq "'";
print "ok 11\n";
# Quoted from rfc1866.txt
14. Proposed Entities
The HTML DTD references the "Added Latin 1" entity set, which only
supplies named entities for a subset of the non-ASCII characters in
[ISO-8859-1], namely the accented characters. The following entities
should be supported so that all ISO 8859-1 characters may only be
referenced symbolically. The names for these entities are taken from
the appendixes of [SGML].
<!ENTITY nbsp CDATA " " -- no-break space -->
<!ENTITY iexcl CDATA "¡" -- inverted exclamation mark -->
<!ENTITY cent CDATA "¢" -- cent sign -->
<!ENTITY pound CDATA "£" -- pound sterling sign -->
<!ENTITY curren CDATA "¤" -- general currency sign -->
<!ENTITY yen CDATA "¥" -- yen sign -->
<!ENTITY brvbar CDATA "¦" -- broken (vertical) bar -->
<!ENTITY sect CDATA "§" -- section sign -->
<!ENTITY uml CDATA "¨" -- umlaut (dieresis) -->
<!ENTITY copy CDATA "©" -- copyright sign -->
<!ENTITY ordf CDATA "ª" -- ordinal indicator, feminine -->
<!ENTITY laquo CDATA "«" -- angle quotation mark, left -->
<!ENTITY not CDATA "¬" -- not sign -->
<!ENTITY shy CDATA "­" -- soft hyphen -->
<!ENTITY reg CDATA "®" -- registered sign -->
<!ENTITY macr CDATA "¯" -- macron -->
<!ENTITY deg CDATA "°" -- degree sign -->
<!ENTITY plusmn CDATA "±" -- plus-or-minus sign -->
<!ENTITY sup2 CDATA "²" -- superscript two -->
<!ENTITY sup3 CDATA "³" -- superscript three -->
<!ENTITY acute CDATA "´" -- acute accent -->
<!ENTITY micro CDATA "µ" -- micro sign -->
<!ENTITY para CDATA "¶" -- pilcrow (paragraph sign) -->
<!ENTITY middot CDATA "·" -- middle dot -->
<!ENTITY cedil CDATA "¸" -- cedilla -->
<!ENTITY sup1 CDATA "¹" -- superscript one -->
<!ENTITY ordm CDATA "º" -- ordinal indicator, masculine -->
<!ENTITY raquo CDATA "»" -- angle quotation mark, right -->
<!ENTITY frac14 CDATA "¼" -- fraction one-quarter -->
<!ENTITY frac12 CDATA "½" -- fraction one-half -->
<!ENTITY frac34 CDATA "¾" -- fraction three-quarters -->
<!ENTITY iquest CDATA "¿" -- inverted question mark -->
<!ENTITY Agrave CDATA "À" -- capital A, grave accent -->
<!ENTITY Aacute CDATA "Á" -- capital A, acute accent -->
<!ENTITY Acirc CDATA "Â" -- capital A, circumflex accent -->
Berners-Lee & Connolly Standards Track [Page 75]
RFC 1866 Hypertext Markup Language - 2.0 November 1995
<!ENTITY Atilde CDATA "Ã" -- capital A, tilde -->
<!ENTITY Auml CDATA "Ä" -- capital A, dieresis or umlaut mark -->
<!ENTITY Aring CDATA "Å" -- capital A, ring -->
<!ENTITY AElig CDATA "Æ" -- capital AE diphthong (ligature) -->
<!ENTITY Ccedil CDATA "Ç" -- capital C, cedilla -->
<!ENTITY Egrave CDATA "È" -- capital E, grave accent -->
<!ENTITY Eacute CDATA "É" -- capital E, acute accent -->
<!ENTITY Ecirc CDATA "Ê" -- capital E, circumflex accent -->
<!ENTITY Euml CDATA "Ë" -- capital E, dieresis or umlaut mark -->
<!ENTITY Igrave CDATA "Ì" -- capital I, grave accent -->
<!ENTITY Iacute CDATA "Í" -- capital I, acute accent -->
<!ENTITY Icirc CDATA "Î" -- capital I, circumflex accent -->
<!ENTITY Iuml CDATA "Ï" -- capital I, dieresis or umlaut mark -->
<!ENTITY ETH CDATA "Ð" -- capital Eth, Icelandic -->
<!ENTITY Ntilde CDATA "Ñ" -- capital N, tilde -->
<!ENTITY Ograve CDATA "Ò" -- capital O, grave accent -->
<!ENTITY Oacute CDATA "Ó" -- capital O, acute accent -->
<!ENTITY Ocirc CDATA "Ô" -- capital O, circumflex accent -->
<!ENTITY Otilde CDATA "Õ" -- capital O, tilde -->
<!ENTITY Ouml CDATA "Ö" -- capital O, dieresis or umlaut mark -->
<!ENTITY times CDATA "×" -- multiply sign -->
<!ENTITY Oslash CDATA "Ø" -- capital O, slash -->
<!ENTITY Ugrave CDATA "Ù" -- capital U, grave accent -->
<!ENTITY Uacute CDATA "Ú" -- capital U, acute accent -->
<!ENTITY Ucirc CDATA "Û" -- capital U, circumflex accent -->
<!ENTITY Uuml CDATA "Ü" -- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute CDATA "Ý" -- capital Y, acute accent -->
<!ENTITY THORN CDATA "Þ" -- capital THORN, Icelandic -->
<!ENTITY szlig CDATA "ß" -- small sharp s, German (sz ligature) -->
<!ENTITY agrave CDATA "à" -- small a, grave accent -->
<!ENTITY aacute CDATA "á" -- small a, acute accent -->
<!ENTITY acirc CDATA "â" -- small a, circumflex accent -->
<!ENTITY atilde CDATA "ã" -- small a, tilde -->
<!ENTITY auml CDATA "ä" -- small a, dieresis or umlaut mark -->
<!ENTITY aring CDATA "å" -- small a, ring -->
<!ENTITY aelig CDATA "æ" -- small ae diphthong (ligature) -->
<!ENTITY ccedil CDATA "ç" -- small c, cedilla -->
<!ENTITY egrave CDATA "è" -- small e, grave accent -->
<!ENTITY eacute CDATA "é" -- small e, acute accent -->
<!ENTITY ecirc CDATA "ê" -- small e, circumflex accent -->
<!ENTITY euml CDATA "ë" -- small e, dieresis or umlaut mark -->
<!ENTITY igrave CDATA "ì" -- small i, grave accent -->
<!ENTITY iacute CDATA "í" -- small i, acute accent -->
<!ENTITY icirc CDATA "î" -- small i, circumflex accent -->
<!ENTITY iuml CDATA "ï" -- small i, dieresis or umlaut mark -->
<!ENTITY eth CDATA "ð" -- small eth, Icelandic -->
<!ENTITY ntilde CDATA "ñ" -- small n, tilde -->
<!ENTITY ograve CDATA "ò" -- small o, grave accent -->
Berners-Lee & Connolly Standards Track [Page 76]
RFC 1866 Hypertext Markup Language - 2.0 November 1995
<!ENTITY oacute CDATA "ó" -- small o, acute accent -->
<!ENTITY ocirc CDATA "ô" -- small o, circumflex accent -->
<!ENTITY otilde CDATA "õ" -- small o, tilde -->
<!ENTITY ouml CDATA "ö" -- small o, dieresis or umlaut mark -->
<!ENTITY divide CDATA "÷" -- divide sign -->
<!ENTITY oslash CDATA "ø" -- small o, slash -->
<!ENTITY ugrave CDATA "ù" -- small u, grave accent -->
<!ENTITY uacute CDATA "ú" -- small u, acute accent -->
<!ENTITY ucirc CDATA "û" -- small u, circumflex accent -->
<!ENTITY uuml CDATA "ü" -- small u, dieresis or umlaut mark -->
<!ENTITY yacute CDATA "ý" -- small y, acute accent -->
<!ENTITY thorn CDATA "þ" -- small thorn, Icelandic -->
<!ENTITY yuml CDATA "ÿ" -- small y, dieresis or umlaut mark -->
=cut