package HTML::BBCode;
=head1 NAME
HTML::BBCode - Perl extension for converting BBcode to HTML.
=head1 SYNOPSIS
use HTML::BBCode;
my $bbc = HTML::BBCode->new( \%options );
my $html = $bbc->parse($bbcode);
# Input
print $bbc->{bbcode};
# Output
print $bbc->{html};
=head1 DESCRIPTION
C<HTML::BBCode> converts BBCode -as used on the phpBB bulletin
boards- to its HTML equivalent.
Please note that, although this was the first BBCode module, it's by
far not the best nor fastest. It's also not heavilly maintained, so
you might want to look at L<BBCode::Parser> and L<Parse::BBCode>.
=head2 METHODS
The following methods can be used
=head3 new
my $bbc = HTML::BBCode->new({
allowed_tags => [ @bbcode_tags ],
stripscripts => 1,
linebreaks => 1,
});
C<new> creates a new C<HTML::BBCode> object using the configuration
passed to it. The object's default configuration allows all BBCode to
be converted to the default HTML.
=head4 options
=over 5
=item allowed_tags
Defaults to all currently know C<BBCode tags>, being:
b, u, i, color, size, quote, code, list, url, email, img. With this
option, you can specify what BBCode tags you would like to convert.
=item stripscripts
Enabled by default, this option will remove all the XSS trickery (and
thus is probably best not to turn it off).
=item no_html
This option has been removed since version 2.0
=item no_jslink
This option has been removed since version 2.0
=item linebreaks
Disabled by default.
When true, will substitute linebreaks into HTML ('<br />')
=back
=head3 parse
my $html = $bbc->parse($bbcode);
Parses text supplied as a single scalar string and returns the HTML as
a single scalar string.
=head1 CAVEAT: API CHANGES
Please do note that the C<html_tags>, C<no_html>, C<no_jslink> options in
the new method have been removed since version 2.0 due to the XSS protection
(provided by L<HTML::StripScripts::Parser>). This will most likely
break your current scripts (if you used the C<html_tags> option).
=head1 SEE ALSO
=over 4
=item * L<http://www.phpbb.com/phpBB/faq.php?mode=bbcode>
=item * L<http://menno.b10m.net/perl/>
=item * L<HTML::StripScripts::Parser>, L<HTML::BBReverse>, L<BBCode::Parser>
=back
=head1 BUGS
C<Bugs? Impossible!>. Please report bugs to L<http://rt.cpan.org/Ticket/Create.html?Queue=HTML-BBCode>.
=head1 AUTHOR
Menno Blom, E<lt>blom@cpan.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2004-2009 by Menno Blom
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
#------------------------------------------------------------------------------#
use strict;
use warnings;
use HTML::BBCode::StripScripts;
our $VERSION = '2.07';
our @bbcode_tags = qw(code quote b u i color size list url email img);
sub new {
my ($class, $args) = @_;
$args ||= {};
$class->_croak("Options must be a hash reference")
if ref($args) ne 'HASH';
my $self = {};
bless $self, $class;
$self->_init($args) or return undef;
return $self;
}
sub _init {
my ($self, $args) = @_;
my %html_tags = (
code => '<div class="bbcode_code_header">Code:</div>'.
'<div class="bbcode_code_body">%s</div>',
quote => '<div class="bbcode_quote_header">%s</div>'.
'<div class="bbcode_quote_body">%s</div>',
b => '<span style="font-weight:bold">%s</span>',
u => '<span style="text-decoration:underline;">%s</span>',
i => '<span style="font-style:italic">%s</span>',
color => '<span style="color:%s">%s</span>',
size => '<span style="font-size:%spx">%s</span>',
url => '<a href="%s">%s</a>',
email => '<a href="mailto:%s">%s</a>',
img => '<img src="%s" alt="" />',
ul => '<ul>%s</ul>',
ol_number => '<ol>%s</ol>',
ol_alpha => '<ol style="list-style-type:lower-alpha;">%s</ol>',
);
my %options = (
allowed_tags=> \@bbcode_tags,
html_tags => \%html_tags,
stripscripts => 1,
linebreaks => 0,
%{ $args },
);
$self->{options} = \%options;
$self->{'hss'} = HTML::BBCode::StripScripts->new({
Context => 'Flow',
AllowSrc => 1,
AllowMailto => 1,
AllowHref => 1,
AllowRelURL => 1,
EscapeFiltered => 1,
BanAllBut => [qr/a div img li ol span ul/],
Rules => {
br => 1,
img => {
required => ['src'],
'src' => 1,
'alt' => 1,
'*' => 0,
},
a => {
required => ['href'],
'href' => 1,
'*' => 0,
},
img => {
'src' => 1,
'alt' => 1,
'*' => 0,
},
div => {
class => qr{^bbcode_},
'*' => 0,
},
span => {
style => \&_filter_style,
'*' => 0,
},
ol => {
style => qr/^list-style-type:lower-alpha$/,
'*' => 0,
},
ul => 1,
li => 1,
}
});
return $self;
}
# Parse the input!
sub parse {
my ($self, $bbcode) = @_;
return if(!defined $bbcode);
$self->{_stack} = [];
$self->{_in_code_block} = 0;
$self->{_skip_nest} = '';
$self->{_nest_count} = 0;
$self->{_nest_count_stack} = 0;
$self->{_dont_nest} = ['code', 'url', 'email', 'img'];
$self->{bbcode} = '';
$self->{html} = '';
$self->{bbcode} = $bbcode;
my $input = $bbcode;
main:
while(1) {
# End tag
if($input =~ /^(\[\/[^\]]+\])/s) {
my $end = lc $1;
if(($self->{_skip_nest} ne '' && $end ne "[/$self->{_skip_nest}]") ||
($self->{_in_code_block} && $end ne "[/code]")) {
_content($self, $end);
} else {
_end_tag($self, $end);
}
$input = $';
}
# Opening tag
elsif($input =~ /^(\[[^\]]+\])/s ) {
if($self->{_in_code_block}) {
_content($self, $1);
} else {
_open_tag($self, $1);
}
$input = $';
}
# None BBCode content till next tag
elsif($input =~ /^([^\[]+)/s) {
_content($self, $1);
$input = $';
}
# BUG #14138 unmatched bracket, content till end of input
elsif($input =~ /^(.+)$/s) {
_content($self, $1);
$input = $';
}
# Now what?
else {
last main if(!$input); # We're at the end now, stop parsing!
}
}
$self->{html} = join('', @{$self->{_stack}});
return $self->{options}->{stripscripts} ? $self->_stripscripts() : $self->{html};
}
sub _open_tag {
my ($self, $open) = @_;
my ($tag, $rest) = $open =~ m/\[([^=\]]+)(.*)?\]/s; # Don't do this! ARGH!
$tag = lc $tag;
if(_dont_nest($self, $tag) && $tag eq 'img') {
$self->{_skip_nest} = $tag;
}
if($self->{_skip_nest} eq $tag) {
$self->{_nest_count}++;
$self->{_nest_count_stack}++;
}
$self->{_in_code_block}++ if($tag eq 'code');
push @{$self->{_stack}}, '['.$tag.$rest.']';
}
sub _content {
my ($self, $content) = @_;
$content =~ s|\r*||gs;
$content =~ s|\n|<br />\n|gs if($self->{options}->{linebreaks} &&
$self->{_in_code_block} == 0);
push @{$self->{_stack}}, $content;
}
sub _end_tag {
my ($self, $end) = @_;
my ($tag, $arg);
my @buf = ( $end );
if("[/$self->{_skip_nest}]" eq $end && $self->{_nest_count} > 1) {
push @{$self->{_stack}}, $end;
$self->{_nest_count}--;
return;
}
$self->{_in_code_block} = 0 if($end eq '[/code]');
# Loop through the stack
while(1) {
my $item = pop(@{$self->{_stack}});
push @buf, $item;
if(!defined $item) {
map { push @{$self->{_stack}}, $_ if($_) } reverse @buf;
last;
}
if("[$self->{_skip_nest}]" eq "$item") {
$self->{_nest_count_stack}--;
next if($self->{_nest_count_stack} > 0);
}
$self->{_nest_count}--
if("[/$self->{_skip_nest}]" eq $end && $self->{_nest_count} > 0) ;
if($item =~ /\[([^=\]]+).*\]/s) {
$tag = $1;
if ($tag && $end eq "[/$tag]") {
push @{$self->{_stack}}, (_is_allowed($self, $tag))
? _do_BB($self, @buf)
: reverse @buf;
# Clear the _skip_nest?
$self->{_skip_nest} = '' if(defined $self->{_skip_nest} &&
$tag eq $self->{_skip_nest});
last;
}
}
}
$self->{_nest_count_stack} = 0;
}
sub _do_BB {
my ($self, @buf) = @_;
my ($tag, $attr);
my $html;
# Get the opening tag
my $open = pop(@buf);
# We prefer to read in non-reverse way
@buf = reverse @buf;
# Closing tag is kinda useless, pop it
pop(@buf);
# Rest should be content;
my $content = join(' ', @buf);
# What are we dealing with anyway? Any attributes maybe?
if($open =~ /\[([^=\]]+)=?([^\]]+)?]/) {
$tag = $1;
$attr = $2;
}
# Kludgy way to handle specific BBCodes ...
if($tag eq 'quote') {
$html = sprintf($self->{options}->{html_tags}->{quote},
($attr) ? "$attr wrote:"
: "Quote:",
$content
);
} elsif($tag eq 'code') {
$html = sprintf($self->{options}->{html_tags}->{code}, _code($content));
} elsif($tag eq 'list') {
$html = _list($self, $attr, $content);
} elsif(($tag eq 'email' || $tag eq 'url') && !$attr) {
$html = sprintf($self->{options}->{html_tags}->{$tag}, $content,$content);
} elsif ($attr) {
$html = sprintf($self->{options}->{html_tags}->{$tag}, $attr, $content);
} else {
$html = sprintf($self->{options}->{html_tags}->{$tag}, $content);
}
# Return ...
return $html;
}
sub _is_allowed {
my ($self, $check) = @_;
map {
return 1 if ($_ eq $check);
} @{$self->{options}->{allowed_tags}};
return 0;
}
sub _dont_nest {
my ($self, $check) = @_;
map {
return 1 if($_ eq $check);
} @{$self->{_dont_nest}};
return 0;
}
sub _code {
my $code = shift;
$code =~ s|^\s+?[\n\r]+?||;
$code =~ s|<|\<|g;
$code =~ s|>|\>|g;
$code =~ s|\[|\[|g;
$code =~ s|\]|\]|g;
$code =~ s| |\ |g;
$code =~ s|\n|<br />|g;
return $code;
}
sub _list {
my ($self, $attr, $content) = @_;
$content =~ s|^<br />[\s\r\n]*|\n|s;
$content =~ s|\[\*\]([^\[]+)|_list_removelastbr($1)|egs;
$content =~ s|<br />$|\n|s;
if($attr) {
return sprintf($self->{options}->{html_tags}->{ol_number}, $content)
if($attr =~ /^\d/);
return sprintf($self->{options}->{html_tags}->{ol_alpha}, $content)
if($attr =~ /^\D/);
} else {
return sprintf($self->{options}->{html_tags}->{ul}, $content);
}
}
sub _list_removelastbr {
my $content = shift;
$content =~ s|<br />[\s\r\n]*$||;
$content =~ s|^\s*||;
$content =~ s|\s*$||;
return "<li>$content</li>\n";
}
sub _stripscripts {
my $self = shift;
$self->{'html'} = $self->{'hss'}->filter_html($self->{'html'});
return $self->{'html'};
}
sub _filter_style {
my ($filter, $tag, $attr_name, $attr_val) = @_;
if ($attr_val eq 'font-weight:bold'
or $attr_val eq 'text-decoration:underline'
or $attr_val eq 'font-style:italic'
or $attr_val eq 'list-style-type') {
return $attr_val;
}
if ( my ($color) = $attr_val =~ /^color:(.*)/ ) {
my @html_color = qw/
black gray maroon red green lime olive yellow
navy blue purple fuchsia teal aqua silver white
/;
return $attr_val if $color =~ /^#[a-fA-F\d]{6}$/;
return $attr_val if $color =~ /^#[a-fA-F\d]{3}$/;
return $attr_val if grep { $color eq $_ } @html_color;
return undef;
}
if ( $attr_val =~ /font-size:\d+px/ ) {
return $attr_val;
}
return undef;
}
sub _croak {
my ($class, @error) = @_;
require Carp;
Carp::croak(@error);
}
1;