#!/usr/bin/perl -w
my
@ignore_attr
=
qw(bgcolor background color face style link alink vlink text
onblur onchange onclick ondblclick onfocus onkeydown onkeyup onload
onmousedown onmousemove onmouseout onmouseover onmouseup
onreset onselect onunload
)
;
my
@ignore_tags
=
qw(font big small b i)
;
my
@ignore_elements
=
qw(script style)
;
my
%ignore_attr
=
map
{
$_
=> 1}
@ignore_attr
;
sub
tag
{
my
(
$pos
,
$text
) =
@_
;
if
(
@$pos
>= 4) {
my
(
$k_offset
,
$k_len
,
$v_offset
,
$v_len
) = @{
$pos
}[-4 .. -1];
my
$next_attr
=
$v_offset
?
$v_offset
+
$v_len
:
$k_offset
+
$k_len
;
my
$edited
;
while
(
@$pos
>= 4) {
(
$k_offset
,
$k_len
,
$v_offset
,
$v_len
) =
splice
@$pos
, -4;
if
(
$ignore_attr
{
lc
substr
(
$text
,
$k_offset
,
$k_len
)}) {
substr
(
$text
,
$k_offset
,
$next_attr
-
$k_offset
) =
""
;
$edited
++;
}
$next_attr
=
$k_offset
;
}
$text
=~ s/^(<\w+)\s+>$/$1>/
if
$edited
;
}
print
$text
;
}
sub
decl
{
my
$type
=
shift
;
print
shift
if
$type
eq
"doctype"
;
}
sub
text
{
print
shift
;
}
HTML::Parser->new(
api_version
=> 3,
start_h
=> [\
&tag
,
"tokenpos, text"
],
process_h
=> [
""
,
""
],
comment_h
=> [
""
,
""
],
declaration_h
=> [\
&decl
,
"tagname, text"
],
default_h
=> [\
&text
,
"text"
],
ignore_tags
=> \
@ignore_tags
,
ignore_elements
=> \
@ignore_elements
,
)
->parse_file(
shift
) ||
die
"Can't open file: $!\n"
;