The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WWW::BookBot::Chinese;

use 5.008;
use strict;
use warnings;
no warnings qw(uninitialized);
use base qw(WWW::BookBot);
use vars qw($VERSION);
$VERSION = '0.12';

#-------------------------------------------------------------
# Default settings
#	$class->default_settings						=> \%settings
#-------------------------------------------------------------
sub default_settings {
	my $self = shift->SUPER::default_settings;
	$self->{get_language}='zh-cn';
	$self->{language_decode}='gbk';
	$self->{language_encode}='gbk';
	$self;
}

#-------------------------------------------------------------
# Redefined functions
#	$bot->decode_entity($content_dein_deout)			=> N/A
#	$bot->trandict_init								=> $bot->{translate_dict}
#	$bot->msg_init									=> $bot->{messages}
#-------------------------------------------------------------
sub decode_entity {
	#chinese novels sometimes add \x{FF1B} after unkown unicode string
	$_[1]=~s/(?:&\#(\d{1,5});?\x{FF1B}?)/chr($1)/esg;
	$_[1]=~s/(?:&\#[xX]([0-9a-fA-F]{1,5});?\x{FF1B}?)/chr(hex($1))/esg;
	$_[1]=~s/(&([0-9a-zA-Z]{1,9});?)/$WWW::BookBot::entity2char{$2} or $1/esg;
	#normalize middle dot
	$_[1]=~s/\x{2022}/\x{00B7}/sg;
}
sub trandict_init {
	shift->{translate_dict} = {
		'log'		=> "ÈÕÖ¾",
		'result'	=> "½á¹û",
		'DB'		=> "Êý¾Ý",
		'debug'		=> "µ÷ÊÔ",
	}
}
sub msg_init {
	my $skip_info="\n".'$pargs->{levelspace}  url=$pargs->{url}'."\n";
	shift->{messages} = {
		TestMsg			=> '²âÊÔ: $pargs->{TestInfo} $pargs->{TestNum}',
		BookStart		=> '$pargs->{levelspace} [$pargs->{bpos_limit}/$pargs->{book_num}] $pargs->{title_limit} ',
		BookBinaryOK	=> '$pargs->{data_len_KB} $pargs->{write_file}'."\n",
		BookChapterErr	=> ' - ÎÞ·¨·ÖÎö'.$skip_info,
		BookChapterMany	=> '[$pargs->{chapter_num_limit}ÕÂ]',
		BookChapterOne	=> '[µ¥Õ½Ú]',
		BookChapterOK	=> '$pargs->{data_len_KB}'."\n",
		BookTOCFinish	=> '$pargs->{TOC_len_KB}'."\n",
		CatalogInfo		=> 'È¡ÊéÄ¿: ',
		CatalogResultErr=> ' 0Ì×Êé'."\n",
		CatalogResultOK	=> ' $pargs->{book_num}Ì×Êé'."\n",
		CatalogURL		=> '$pargs->{url}',
		CatalogURLEmpty	=> '[ʧ°Ü] Ë÷ÒýµÄURLΪ¿Õ'."\n",
		DBBookErr		=> "\t".' \$bot->go_book({$pargs->{allargs}});'."\t#´íÎó\n",
		DBBookOK		=> "\t".'#\$bot->go_book({$pargs->{allargs}});'."\n",
		DBCatalogErr	=> ' \$bot->go_catalog({$pargs->{allargs}});'."\t#´íÎó\n",
		DBCatalogOK		=> '#\$bot->go_catalog({$pargs->{allargs}});'."\n",
		DBHead			=> <<'DATA',
#!$pargs->{perlcmd}
##======================================
## ×Ô¶¯Éú³ÉµÄÊý¾ÝÎļþ£¬ÓÃÓÚ$pargs->{classname}
##    Éú³Éʱ¼ä: $pargs->{createtime}
##======================================

use $pargs->{classname};
my \$bot = new $pargs->{classname};

DATA
		FailClearDB		=> 'ÎÞ·¨Çå³ýÊý¾ÝÎļþ$pargs->{filename}: $pargs->{errmsg}',
		FailClose	 	=> 'ÎÞ·¨¹Ø±Õ$self->{translate_dict}->{$pargs->{filetype}}Îļþ$pargs->{filename}: $pargs->{errmsg}',
		FailMkDir		=> '½¨Ä¿Â¼$pargs->{dir}ʧ°Ü: $pargs->{errmsg}',
		FailOpen	 	=> 'ÎÞ·¨´ò¿ª$self->{translate_dict}->{$pargs->{filetype}}Îļþ$pargs->{filename}: $pargs->{errmsg}',
		FailWrite	 	=> 'ÎÞ·¨Ð´Èë$self->{translate_dict}->{$pargs->{filetype}}Îļþ$pargs->{filename}: $pargs->{errmsg}',
		GetFail404		=> <<'DATA',
[$pargs->{code},ʧ°Ü] ÕÒ²»µ½Îļþ
        $pargs->{url_real}
DATA
		GetFail404Detail=> <<'DATA',
[$pargs->{code},ʧ°Ü] ÕÒ²»µ½Îļþ
>>>>ÇëÇó
$pargs->{req_content}<<<<ÏìÓ¦
$pargs->{status_line}

DATA
		GetFailRetries	=> <<'DATA',
[$pargs->{code},ʧ°Ü] ÖØÊÔÌ«¶à£¬·ÅÆú
        $pargs->{url_real}
DATA
		GetFailRetriesDetail	=> <<'DATA',
[$pargs->{code},ʧ°Ü] ÖØÊÔÌ«¶à£¬·ÅÆú
>>>>ÇëÇó
$pargs->{req_content}<<<<ÏìÓ¦
$pargs->{status_line}
$pargs->{res_content}

DATA
		GetURLSuccess	=> '$pargs->{len_KB} ',
		GetURLRetry		=> '[$pargs->{code},ÖØÊÔ] ',
		GetWait			=> 'µÈ´ý..',
		SkipMaxLevel	=> '[Ìø¹ý]²ãÊý>$self->{book_max_levels}'.$skip_info,
		SkipMedia		=> '[Ìø¹ý]ýÌåÎļþ'.$skip_info,
		SkipTitleEmpty	=> '[Ìø¹ý]±êÌâΪ¿Õ'.$skip_info,
		SkipUrlEmpty	=> '[Ìø¹ý]µØַΪ¿Õ'."\n",
		SkipVisited		=> '[Ìø¹ý]ÒÑ·ÃÎʹý'."\n",
		SkipZip			=> '[Ìø¹ý]ѹËõÎļþ'.$skip_info,
	};
}

#-------------------------------------------------------------
# patterns
#-------------------------------------------------------------
sub getpattern_space2_data {
	<<'DATA';
[¡¡£ ¡@]
DATA
}
sub getpattern_line_head_data {
	'¡¡¡¡';
}
sub getpattern_parentheses_data {
	shift->SUPER::getpattern_parentheses_data().<<'DATA';
¡¨ ¡¨
¡® ¡¯
¡° ¡±
¡² ¡³
¡´ ¡µ
¡¶ ¡·
¡¸ ¡¹
¡º ¡»
¡¼ ¡½
¡¾ ¡¿
¡ä ¡ä
¡å ¡å
£¢ £¢
£§ £§
£¨ £©
£¼ £¾
£Û £Ý
£à £à
£à £§
£û £ý
¦à ¦á
¦â ¦ã
¦ä ¦å
¦æ ¦ç
¦è ¦é
¦ê ¦ë
¦î ¦ï
¦ð ¦ñ
¨A ¨@
¨F ¨F
¨” ¨•
©v ©w
©x ©y
©z ©{
©‚ ©ƒ
DATA
}
sub getpattern_mark_dash_data {
	<<'DATA';
[#-&\*\+\-=@_~¡¥¡ª¡«¡¬¡­¡Á¡Â¡Ë¡Ñ¡Ô¡Ö¡×¡Þ¡ç¡è¡é¡ë¡ì£££¥£¦£ª£«£­£½£À£ß£ü¨C¨D¨E¨O©W©\©`©¤-¡á©–¡þ¡ù¦ò-¦õ©h-©n©~©€©©„©†©‡©ˆ]
DATA
}
sub getpattern_mark_wordsplit_data {
	<<'DATA';
[\.\,\?\!\:\;¡Ã¡¢¡£¡¤£¡£¬£®£º£»£¿©U©o©p©q©r©s©t©u]
DATA
}
sub getpattern_word_finish_data {
	<<'DATA';
(?:È«[ÎÄÊé]|)[ÍêÖÕ]
DATA
}
sub getpattern_remove_line_by_end_data {
	<<'DATA';
(case)
[±¨ÍøÉçѶ]
[Á¬ÖØÅÅÕû³öÌáÍÆɨУ½Ï±àÊéÊÀÊÓÎÄ¿ÆÔÚÌÖС¹¤×ª][ѧ»ÃÂÛ×÷]?(?:[ÔØÌùÅÅ°æÀíÆ·¹©³öÈëУ½ÏÃèÕý¶ÔÕßÎÝ¿â³Ç·½çÔ·ÏßÇø×éÊÒ]|º£Ñó|ÍûÔ¶¾µ|ÌÒ»¨Ô´|-K12)(?:Íê³É|)
Çë(?:ÉêÇëÊÚȨ|±£Áôվ̨ÐÅÏ¢)[¡££®©q\.£¡©u]?
ÖÆ×÷
[Oo£Ï£ï][Cc£Ã£ã][Rr£Ò£ò]
²É±àÖÐÐÄ
Òà·²¹«ÒæͼÊé¹Ý
ÁúµÄÌì¿Õ
ʧÂäµÄÐdz½
ÊéÏãÃŵÚ
¾ÉÓêÂ¥
Ò»½£Ð¡ÌìÏÂ
Öñ¶ºÉ·ç
Ñï½£Ðù¾ÓÊ¿
»ÃÏëʱ´ú
ðÏÕÕßÌìÌÃ
ÐÅÏ¢ÖÐÐÄ
cnread[\.¡££®¡¤©q]net
ezla[\.¡££®¡¤©q]com?[\.¡££®¡¤©q]tw
thebook[\.¡££®¡¤©q]yeah[\.¡££®¡¤©q]net
y(?:esho[\.¡££®¡¤©q]com/wenxue|uzispy[\.¡££®¡¤©q]yeah[\.¡££®¡¤©q]net)
www[\.¡££®¡¤©q](?:v-war|oldrain)[\.¡££®¡¤©q](?:net|com)
DATA
}
sub getpattern_remove_line_by_end_special_data {
	<<'DATA';
±¨ÍøÉçѶ
DATA
}

1;
__END__

=head1 NAME

WWW::BookBot::Chinese - Virtual class of bots to process chinese e-texts.

=head1 SYNOPSIS

  use WWW::BookBot::Chinese::Novel::DragonSky;
  my $bot=WWW::BookBot::Chinese::Novel::DragonSky->new({work_dir=>'/output'});
  $bot->go_catalog({});

  use WWW::BookBot::Chinese::Novel::ShuKu;
  my $bot=WWW::BookBot::Chinese::Novel::ShuKu->new({});
  $bot->go_catalog({desc=>'NewNovel', cat1=>0, cat2=>1, pageno=>0});

=head1 ABSTRACT

Virtual class of bots to process chinese e-texts.

=head1 DESCRIPTION

Virtual class of bots to process chinese e-texts.

to be added.

=head2 EXPORT

None by default.

=head1 BUGS, REQUESTS, COMMENTS

Please report any requests, suggestions or bugs via
http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-BookBot

=head1 AUTHOR

Qing-Jie Zhou E<lt>qjzhou@hotmail.comE<gt>

=head1 SEE ALSO

L<WWW::BookBot>

=cut