use strict;
use warnings;
use Test::More tests => 157;
require_ok('HTML::Laundry');
my $l1 = HTML::Laundry->new({ notidy => 1 });
ok( ! $l1->clean('<script>alert("Jane Austen was here!");</script>'), '<script> is removed in its entirety');
ok( ! $l1->clean('<applet>blah blah</applet>'), '<applet> is removed in its entirety');
is( $l1->clean('<heroine>No one who had ever seen Catherine Morland in her infancy...</heroine>'),
'No one who had ever seen Catherine Morland in her infancy...',
'Unknown tag is stripped, but its contents remain' );
is( $l1->clean('<body>foo</body>'), 'foo', '<body> tag is stripped');
is( $l1->clean('<link />'), '', '<link> tag is stripped');
is( $l1->clean('<meta />'), '', '<meta> tag is stripped');
is( $l1->clean('<html>foo</html>'), 'foo', '<html> tag is stripped');
ok( ! $l1->clean('<?php echo("Foo"); ?>') && ! $l1->clean('<?= $foo ?>'), 'PHP tags are stripped entirely');
is( $l1->clean('<%= "Hello World!" %>'), '<%= "Hello World!" %>', 'ASP tags are transformed into literal text');
is( $l1->clean('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'),
'', 'DOCTYPE declaration is stripped');
is( $l1->clean('<p class="xyzzy" plugh="plover">Her situation in life, the character of her father and mother, her own person and disposition, were all equally against her.</p>'),
'<p class="xyzzy">Her situation in life, the character of her father and mother, her own person and disposition, were all equally against her.</p>',
'Unknown attribute is stripped, but known attribute remains' );
my @e = (
'a', 'abbr', 'acronym', 'address', 'area', 'b', 'bdo', 'big', 'blockquote',
'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd',
'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd',
'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p',
'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead',
'tr', 'tt', 'u', 'ul', 'var', 'wbr'
);
my %empty = (
area => 1,
br => 1,
col => 1,
hr => 1,
img => 1,
input => 1,
);
foreach my $e ( @e ) {
if ( $empty{$e} ) {
is( $l1->clean("<$e></$e>"), "<$e />", "element $e is not sanitized (empty tag)");
} else {
is( $l1->clean("<$e></$e>"), "<$e></$e>", "element $e is not sanitized");
}
}
my @a = ( 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt',
'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset',
'checked', 'cite', 'class', 'clear', 'color', 'cols', 'colspan', 'compact',
'coords', 'datetime', 'dir', 'disabled', 'enctype', 'for', 'frame',
'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label',
'lang', 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows',
'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src',
'start', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap',
'valign', 'value', 'vspace', 'width', 'xml:lang' );
foreach my $a ( @a ) {
is( $l1->clean("<p $a=\"frotz\"></p>"), "<p $a=\"frotz\"></p>", "attribute $a is not sanitized");
}