TODO - metacpan.org

1. more document is needed.
2. some interface may be changed in future.

----------------------- cut of sources
	$self->{Url_Last} = $url;
	$self->{Url_Base} = $res->base->as_string;
	$self->{Url_Code} = $res->code;
	$self->{Url_Title} = $res->headers->title;
	$self->{Url_Content_Type} = $res->headers->content_type;
	$self->{Url_Last_Modified} = $res->headers->last_modified;
	$self->{Url_Last_Modified_Iso} = HTTP::Date::time2iso($res->headers->last_modified);
$res->content
$res->is_success

$str="<Test>\nThis is a Test\r\n;<!-- Abc\r \n-->";
is($bot->get_url_textok($str), "<ln> <Test><ln>This is a Test<ln>;<!-- Abc--><ln>", "function get_url_text_ok");

$bot=&recreate($testdir);
$str=<<STR;
Before <!---
Test of mark <font>FONTS</font>
---> Inner
<script src="Test">display();</script>
Another   Space
STR
$str=~s/(\r\n|\n|\r)/<ln>/g;
is($bot->parse_empty($str), "Before Inner<ln><ln>Another Space<ln>", "function parse_empty");
$bot->{REMOVE_LEADING_SPACES}=1;
$str="<ln>    a good<ln>  dog<ln>  must<ln>  be<ln>  good.<ln>";
is($bot->parse_leadingspace($str), "<ln>  a good<ln>dog<ln>must<ln>be<ln>good.<ln>", "function parse_leadingspace");
use WWW::BookBot::Chinese;
$bot=new WWW::BookBot::Chinese;
$bot->initialize;
$bot->{TEXTREMOVEINNERSPACE}=1;
$str="A b o u t  a  ÖÐgreat ¹ú ÊÇ Ò» ¸ö and  w o n d e r f u l  i d e a.<ln>";
is($bot->parse_innerspace($str), "About a great and wonderful idea.<ln>", "function parse_innerspace");


#print Dumper($bot);



#-----------------------------------------------
# get_url_ok: prepare received contents
#-----------------------------------------------
# $str			contents
#---RETURN contents
#-----------------------------------------------
sub get_url_ok {
	my $self = shift;
	return ($self->{Url_Content_Type} eq 'text/html'
			or $self->{Url_Content_Type} eq 'text/plain') ?
		$self->get_url_textok(@_) : $self->get_url_binok(@_);
}

#-----------------------------------------------
# get_url_textok: prepare received text contents by replace \r\n as <ln>
#-----------------------------------------------
# $str			contents
#---RETURN contents
#-----------------------------------------------
sub get_url_textok {
	my $self = shift;
	my $str=$self->de_code($_[0]);
	$str=~s/(\r\n|\r|\n)/<ln>/g;
	$str=~s/(<[^<>]*)(?: *<ln>)+([^<>]*>)/$1$2/g;	#prepare <> in different lines
	return "<ln>$str<ln>";
}

#-----------------------------------------------
# parse_img: parser images
#-----------------------------------------------
# $base			base url
# $contents		html contents
#-----------------------------------------------
sub parse_img {
	my $self = shift;
	my $base = shift;
	my ($html, $src);
	while($_[0]=~/<img([^<>]*>)/ig) {
		$html=$1;
		$src='';
		if( $html=~/src *= *\"([^\">]*)\"/ ) {
			$src=$1;
		} elsif( $html=~/src *= *\'([^\'>]*)\'/ ) {
			$src=$1;
		} elsif( $html=~/src *= *([^ >]*)( |>)/ ) {
			$src=$1;
		}
		$self->parse_img_ok($self->url_rel2abs($src, $base));
	}
}

#-----------------------------------------------
# parse_img_ok: found good image url
#-----------------------------------------------
# $url			image url
#-----------------------------------------------
sub parse_img_ok {
	my $self = shift;
	my ($url) = @_;
	# not finished
}


application/octet-stream
text/plain


$p1="\$str=~\/^(?:$p1\)\$/x";

$str=$found if eval($p1);

	$str=$self->parse_removespace($str);
	if( $str=~/\n +[^ \n][^\n]*\n[^ \n][^\n]*\n[^ \n][^\n]*\n/sg ) {
		# "\n " -> <br>, "\n" -> no use
		$str=~s/\n +(?=[^ \n])/<br> /sg;
		$str=~s/\n//sg;
		$str=~s/<br>/\n/g;
	}


#-----------------------------------------------
# parse_innerspace: remove inner space
#-----------------------------------------------
# $content		contents
#---RETURN contents
#-----------------------------------------------
sub parse_innerspace {
	my $self = shift;
	return $_[0] if not $self->{TEXTREMOVEINNERSPACE};

	my $pattern_sub="[^ <>]($self->{Pattern_Mark})* ";
	my $pattern="";
	for(my $i=0; $i<7; $i++) {
		$pattern.=$pattern_sub;
	}
	return $_[0] if not $_[0]=~/$pattern/;

	my $str1=$_[0];
	$str1=~s/([0-9a-zA-Z]($self->{Pattern_Mark})) [0-9a-zA-Z][0-9a-zA-Z]//g;
	my $str="";
	foreach (split /(<ln>| {2,})/, $_[0]) {
		print "($_) ";
		if(/^  /) {
			$str.=" ";
			next;
		}
		
		$_=~s/ //g if $_=~/^[^ ]($self->{Pattern_Mark})*( [^ ]($self->{Pattern_Mark})*)+$/;
		$str.=$_;
	}

	return $str;
}

WWW::BookBot is a bot to fetch web e-texts with catalog, books and chapters. It can fetch and reformat multiple html pages into a single text file, which can be readed in .
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)