ext/HTML-Entities/t/entities.t

use v6-alpha;
use Test;

plan 11;

use HTML::Entities; pass "(dummy instead of broken use_ok)";

my $a = "V&aring;re norske tegn b&oslash;r &#230;res";
$a = decode_entities($a);
is $a, "Våre norske tegn bør æres", 'Decoding entities should work';

$a = encode_entities($a);

is $a, "V&aring;re norske tegn b&oslash;r &aelig;res",
    '... and encoding entities should work';

$a = decode_entities($a);
$a = encode_entities_numeric($a);

is $a, "V&#xE5;re norske tegn b&#xF8;r &#xE6;res", 
    '... and encode_entities_numeric should also work.';

$a = "<&>";
is encode_entities($a), "&lt;&amp;&gt;", 
    'We should be able to encode basic HTML entities';
$a = "<&>";
is encode_entities_numeric($a), "&#x3C;&#x26;&#x3E;",
    '... or encode them numerically, if desired.';

$a = "V&aring;re norske tegn b&oslash;r &#230;res";
decode_entities($a);

is($a, "Våre norske tegn bør æres",
    'Decoding entities should work in void context');

encode_entities($a);
is($a, "V&aring;re norske tegn b&oslash;r &aelig;res",
    '... and encoding entities should also work in void context');

$a = "abcdef";
is encode_entities($a, 'a-c'), "&#97;&#98;&#99;def",
    'We should be able to include the range of characters to encode.';

my $b = "&lt;&amp;&gt;";
is(decode_entities([$a, $b]), [ 'abcdef', '<&>' ], "Decoding an array ref should work.");
is(decode_entities($a, $b), [ 'abcdef', '<&>' ], "Decoding a list should work too.");

=head

# See how well it does against rfc1866...
$ent = $plain = "";
while (<DATA>) {
    next unless /^\s*<!ENTITY\s+(\w+)\s*CDATA\s*\"&\#(\d+)/;
    $ent .= "&$0;";
    $plain .= chr($1);
}
print ">>>>$ent\n>>>>$plain\n";

$a = $ent;
decode_entities($a);
print "DDD>$a\n";
print "not " if $a ne $plain;
print "ok 7\n";

# Try decoding when the ";" are left out
$a = $ent,
$a =~ s/;//g;
decode_entities($a);
print ";;;>$a\n";
print "not " if $a ne $plain;
print "ok 8\n";


$a = $plain;
encode_entities($a);
print "EEE>$a\n";
print "not " if $a ne $ent;
print "ok 9\n";


# From: Bill Simpson-Young <bill.simpson-young@cmis.csiro.au>
# Subject: HTML entities problem with 5.11
# To: libwww-perl@ics.uci.edu
# Date: Fri, 05 Sep 1997 16:56:55 +1000
# Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU>
#
# Hi. I've got a problem that has surfaced with the changes to 
# HTML::Entities.pm for 5.11 (it doesn't happen with 5.08).  It's happening 
# in the process of encoding then decoding special entities.  Eg, what goes 
# in as "abc&def&ghi" comes out as "abc&def;&ghi;".

print "not " unless decode_entities("abc&def&ghi&abc;&def;") eq
                                    "abc&def&ghi&abc;&def;";
print "ok 10\n";

# Decoding of &apos;
print "not " unless decode_entities("&apos;") eq "'" &&
                    encode_entities("'", "'") eq "&#39;";
print "ok 11\n";

# Quoted from rfc1866.txt

14. Proposed Entities

   The HTML DTD references the "Added Latin 1" entity set, which only
   supplies named entities for a subset of the non-ASCII characters in
   [ISO-8859-1], namely the accented characters. The following entities
   should be supported so that all ISO 8859-1 characters may only be
   referenced symbolically. The names for these entities are taken from
   the appendixes of [SGML].

    <!ENTITY nbsp   CDATA "&#160;" -- no-break space -->
    <!ENTITY iexcl  CDATA "&#161;" -- inverted exclamation mark -->
    <!ENTITY cent   CDATA "&#162;" -- cent sign -->
    <!ENTITY pound  CDATA "&#163;" -- pound sterling sign -->
    <!ENTITY curren CDATA "&#164;" -- general currency sign -->
    <!ENTITY yen    CDATA "&#165;" -- yen sign -->
    <!ENTITY brvbar CDATA "&#166;" -- broken (vertical) bar -->
    <!ENTITY sect   CDATA "&#167;" -- section sign -->
    <!ENTITY uml    CDATA "&#168;" -- umlaut (dieresis) -->
    <!ENTITY copy   CDATA "&#169;" -- copyright sign -->
    <!ENTITY ordf   CDATA "&#170;" -- ordinal indicator, feminine -->
    <!ENTITY laquo  CDATA "&#171;" -- angle quotation mark, left -->
    <!ENTITY not    CDATA "&#172;" -- not sign -->
    <!ENTITY shy    CDATA "&#173;" -- soft hyphen -->
    <!ENTITY reg    CDATA "&#174;" -- registered sign -->
    <!ENTITY macr   CDATA "&#175;" -- macron -->
    <!ENTITY deg    CDATA "&#176;" -- degree sign -->
    <!ENTITY plusmn CDATA "&#177;" -- plus-or-minus sign -->
    <!ENTITY sup2   CDATA "&#178;" -- superscript two -->
    <!ENTITY sup3   CDATA "&#179;" -- superscript three -->
    <!ENTITY acute  CDATA "&#180;" -- acute accent -->
    <!ENTITY micro  CDATA "&#181;" -- micro sign -->
    <!ENTITY para   CDATA "&#182;" -- pilcrow (paragraph sign) -->
    <!ENTITY middot CDATA "&#183;" -- middle dot -->
    <!ENTITY cedil  CDATA "&#184;" -- cedilla -->
    <!ENTITY sup1   CDATA "&#185;" -- superscript one -->
    <!ENTITY ordm   CDATA "&#186;" -- ordinal indicator, masculine -->
    <!ENTITY raquo  CDATA "&#187;" -- angle quotation mark, right -->
    <!ENTITY frac14 CDATA "&#188;" -- fraction one-quarter -->
    <!ENTITY frac12 CDATA "&#189;" -- fraction one-half -->
    <!ENTITY frac34 CDATA "&#190;" -- fraction three-quarters -->
    <!ENTITY iquest CDATA "&#191;" -- inverted question mark -->
    <!ENTITY Agrave CDATA "&#192;" -- capital A, grave accent -->
    <!ENTITY Aacute CDATA "&#193;" -- capital A, acute accent -->
    <!ENTITY Acirc  CDATA "&#194;" -- capital A, circumflex accent -->



Berners-Lee & Connolly      Standards Track                    [Page 75]

RFC 1866            Hypertext Markup Language - 2.0        November 1995


    <!ENTITY Atilde CDATA "&#195;" -- capital A, tilde -->
    <!ENTITY Auml   CDATA "&#196;" -- capital A, dieresis or umlaut mark -->
    <!ENTITY Aring  CDATA "&#197;" -- capital A, ring -->
    <!ENTITY AElig  CDATA "&#198;" -- capital AE diphthong (ligature) -->
    <!ENTITY Ccedil CDATA "&#199;" -- capital C, cedilla -->
    <!ENTITY Egrave CDATA "&#200;" -- capital E, grave accent -->
    <!ENTITY Eacute CDATA "&#201;" -- capital E, acute accent -->
    <!ENTITY Ecirc  CDATA "&#202;" -- capital E, circumflex accent -->
    <!ENTITY Euml   CDATA "&#203;" -- capital E, dieresis or umlaut mark -->
    <!ENTITY Igrave CDATA "&#204;" -- capital I, grave accent -->
    <!ENTITY Iacute CDATA "&#205;" -- capital I, acute accent -->
    <!ENTITY Icirc  CDATA "&#206;" -- capital I, circumflex accent -->
    <!ENTITY Iuml   CDATA "&#207;" -- capital I, dieresis or umlaut mark -->
    <!ENTITY ETH    CDATA "&#208;" -- capital Eth, Icelandic -->
    <!ENTITY Ntilde CDATA "&#209;" -- capital N, tilde -->
    <!ENTITY Ograve CDATA "&#210;" -- capital O, grave accent -->
    <!ENTITY Oacute CDATA "&#211;" -- capital O, acute accent -->
    <!ENTITY Ocirc  CDATA "&#212;" -- capital O, circumflex accent -->
    <!ENTITY Otilde CDATA "&#213;" -- capital O, tilde -->
    <!ENTITY Ouml   CDATA "&#214;" -- capital O, dieresis or umlaut mark -->
    <!ENTITY times  CDATA "&#215;" -- multiply sign -->
    <!ENTITY Oslash CDATA "&#216;" -- capital O, slash -->
    <!ENTITY Ugrave CDATA "&#217;" -- capital U, grave accent -->
    <!ENTITY Uacute CDATA "&#218;" -- capital U, acute accent -->
    <!ENTITY Ucirc  CDATA "&#219;" -- capital U, circumflex accent -->
    <!ENTITY Uuml   CDATA "&#220;" -- capital U, dieresis or umlaut mark -->
    <!ENTITY Yacute CDATA "&#221;" -- capital Y, acute accent -->
    <!ENTITY THORN  CDATA "&#222;" -- capital THORN, Icelandic -->
    <!ENTITY szlig  CDATA "&#223;" -- small sharp s, German (sz ligature) -->
    <!ENTITY agrave CDATA "&#224;" -- small a, grave accent -->
    <!ENTITY aacute CDATA "&#225;" -- small a, acute accent -->
    <!ENTITY acirc  CDATA "&#226;" -- small a, circumflex accent -->
    <!ENTITY atilde CDATA "&#227;" -- small a, tilde -->
    <!ENTITY auml   CDATA "&#228;" -- small a, dieresis or umlaut mark -->
    <!ENTITY aring  CDATA "&#229;" -- small a, ring -->
    <!ENTITY aelig  CDATA "&#230;" -- small ae diphthong (ligature) -->
    <!ENTITY ccedil CDATA "&#231;" -- small c, cedilla -->
    <!ENTITY egrave CDATA "&#232;" -- small e, grave accent -->
    <!ENTITY eacute CDATA "&#233;" -- small e, acute accent -->
    <!ENTITY ecirc  CDATA "&#234;" -- small e, circumflex accent -->
    <!ENTITY euml   CDATA "&#235;" -- small e, dieresis or umlaut mark -->
    <!ENTITY igrave CDATA "&#236;" -- small i, grave accent -->
    <!ENTITY iacute CDATA "&#237;" -- small i, acute accent -->
    <!ENTITY icirc  CDATA "&#238;" -- small i, circumflex accent -->
    <!ENTITY iuml   CDATA "&#239;" -- small i, dieresis or umlaut mark -->
    <!ENTITY eth    CDATA "&#240;" -- small eth, Icelandic -->
    <!ENTITY ntilde CDATA "&#241;" -- small n, tilde -->
    <!ENTITY ograve CDATA "&#242;" -- small o, grave accent -->



Berners-Lee & Connolly      Standards Track                    [Page 76]

RFC 1866            Hypertext Markup Language - 2.0        November 1995


    <!ENTITY oacute CDATA "&#243;" -- small o, acute accent -->
    <!ENTITY ocirc  CDATA "&#244;" -- small o, circumflex accent -->
    <!ENTITY otilde CDATA "&#245;" -- small o, tilde -->
    <!ENTITY ouml   CDATA "&#246;" -- small o, dieresis or umlaut mark -->
    <!ENTITY divide CDATA "&#247;" -- divide sign -->
    <!ENTITY oslash CDATA "&#248;" -- small o, slash -->
    <!ENTITY ugrave CDATA "&#249;" -- small u, grave accent -->
    <!ENTITY uacute CDATA "&#250;" -- small u, acute accent -->
    <!ENTITY ucirc  CDATA "&#251;" -- small u, circumflex accent -->
    <!ENTITY uuml   CDATA "&#252;" -- small u, dieresis or umlaut mark -->
    <!ENTITY yacute CDATA "&#253;" -- small y, acute accent -->
    <!ENTITY thorn  CDATA "&#254;" -- small thorn, Icelandic -->
    <!ENTITY yuml   CDATA "&#255;" -- small y, dieresis or umlaut mark -->

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)