@@ -1,48 +1,3 @@
-1.09 Tue 30 March 2004, 13:00pm BST
-
-+ Decided to release a 'production' version. It works, it works
- well, it just lacks some documentation. This way, people will
- find it. That's good.
-
-+ Fixed a bug that'd allow trailing data after the last }
-
-1.8_7 Wed 17 March 2004, 15:00pm GBT
-
-+ Actually fixed ansi/charmap problem actually for real :-/
-
-1.8_6 Sun 14 March 2004, 22:00pm GBT
-
-+ 'ansi' and 'char_map' files put into modules, to make
- packing the module into an executable easier, and to
- make some really nasty code redundant
-+ RTF::TEXT::Converter does The Right Thing with entities
- and blank lines, even on non \ansi files...
-+ Little files now get converted properly
-- \ulnone treated like \ul0 in RTF::Control
-
-1.8_5 Sat 24 January 2004, 10:45pm GBT
-
-+ Documentation is almost done
-+ RTF::Parser is fully tested
-+ Final beta release?
-
-1.8_3 Jan 22 January 2004, 12:55pm GBT
-
-+ Added documentation for the HTML and TEXT converting
- modules
-
-1.8_2 Mon 12 January 2004, 07:10pm GBT
-
-+ Added RTF::Control tests and documentation
-+ Added more RTF::Parser docs
-+ Moved executables into their own directory
-
-1.8_1 Thu 28 August 2003, 10:00pm GBT
-
-+ Rewrote all of RTF::Parser
-+ Documented RTF::Parser's interface
-+ Added a nice big sample in the docs
-
1.7 mercredi, 28 juillet 1999, 12:10:54 MET DST
- Corrected problem with the loading of the char_map file
@@ -1,35 +1,19 @@
Changes
MANIFEST
-META.yml
+Makefile
Makefile.PL
README
-bin/rtf2html
-bin/rtf2text
-docs/Overview.pod How the distribution fits together
-docs/rtf2html.txt
examples/tests.pl
lib/RTF/Charsets.pm
lib/RTF/Config.pm
lib/RTF/Control.pm
-lib/RTF/HTML/Converter/ansi.pm
-lib/RTF/HTML/Converter/charmap.pm
lib/RTF/HTML/Converter.pm
+lib/RTF/HTML/ansi
+lib/RTF/HTML/char_map
lib/RTF/Parser.pm
-lib/RTF/TEXT/Converter/ansi.pm
-lib/RTF/TEXT/Converter/charmap.pm
lib/RTF/TEXT/Converter.pm
-t/01_parser_01_new.t
-t/01_parser_02_api.t
-t/01_parser_03_parse_string.t
-t/01_parser_04_parse_file.t
-t/01_parser_05_control_definition.t
-t/01_parser_06_skip_destinations.t
-t/02_control_01_new.t
-t/02_control_02_configure.t
-t/02_control_03_application_dir.t
-t/02_control_04_charmap_reader.t
-t/02_control_05_strip_dead_content.t
-t/03_html_01_handling_ulnone.t
-t/05_charmaps_01_loading.t
-t/05_charmaps_02_html_sample.t
-t/99_old_test.t
+lib/RTF/TEXT/ansi
+lib/RTF/TEXT/char_map
+rtf2html
+rtf2text
+t/t1.t
@@ -1,10 +0,0 @@
-#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
-name: RTF-Parser
-version: 1.08_6
-version_from: lib/RTF/Parser.pm
-installdirs: site
-requires:
- RTF::Tokenizer: 1.04
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.12
@@ -0,0 +1,694 @@
+# This Makefile is for the RTF::Parser extension to perl.
+#
+# It was generated automatically by MakeMaker version
+# 5.4301 (Revision: 1.222) from the contents of
+# Makefile.PL. Don't edit this file, edit Makefile.PL instead.
+#
+# ANY CHANGES MADE HERE WILL BE LOST!
+#
+# MakeMaker Parameters:
+
+# DISTNAME => q[RTF-Parser]
+# EXE_FILES => [q[rtf2html], q[rtf2text]]
+# NAME => q[RTF::Parser]
+# VERSION_FROM => q[lib/RTF/Parser.pm]
+# dist => { COMPRESS=>q[gzip], SUFFIX=>q[gz] }
+
+# --- MakeMaker post_initialize section:
+
+
+# --- MakeMaker const_config section:
+
+# These definitions are from config.sh (via /usr/local/lib/perl5/5.00502/sun4-solaris/Config.pm)
+
+# They may have been overridden via Makefile.PL or on the command line
+AR = ar
+CC = gcc
+CCCDLFLAGS = -fPIC
+CCDLFLAGS =
+DLEXT = so
+DLSRC = dl_dlopen.xs
+LD = gcc
+LDDLFLAGS = -G -L/usr/local/lib
+LDFLAGS = -L/usr/local/lib
+LIBC = /lib/libc.so
+LIB_EXT = .a
+OBJ_EXT = .o
+OSNAME = solaris
+OSVERS = 2.5.1
+RANLIB = :
+SO = so
+EXE_EXT =
+
+
+# --- MakeMaker constants section:
+AR_STATIC_ARGS = cr
+NAME = RTF::Parser
+DISTNAME = RTF-Parser
+NAME_SYM = RTF_Parser
+VERSION = 1.07
+VERSION_SYM = 1_07
+XS_VERSION = 1.07
+INST_BIN = blib/bin
+INST_EXE = blib/script
+INST_LIB = blib/lib
+INST_ARCHLIB = blib/arch
+INST_SCRIPT = blib/script
+PREFIX = /usr/local
+INSTALLDIRS = site
+INSTALLPRIVLIB = $(PREFIX)/lib/perl5/5.00502
+INSTALLARCHLIB = $(PREFIX)/lib/perl5/5.00502/sun4-solaris
+INSTALLSITELIB = $(PREFIX)/lib/perl5/site_perl/5.005
+INSTALLSITEARCH = $(PREFIX)/lib/perl5/site_perl/5.005/sun4-solaris
+INSTALLBIN = $(PREFIX)/bin
+INSTALLSCRIPT = $(PREFIX)/bin
+PERL_LIB = /usr/local/lib/perl5/5.00502
+PERL_ARCHLIB = /usr/local/lib/perl5/5.00502/sun4-solaris
+SITELIBEXP = /usr/local/lib/perl5/site_perl/5.005
+SITEARCHEXP = /usr/local/lib/perl5/site_perl/5.005/sun4-solaris
+LIBPERL_A = libperl.a
+FIRST_MAKEFILE = Makefile
+MAKE_APERL_FILE = Makefile.aperl
+PERLMAINCC = $(CC)
+PERL_INC = /usr/local/lib/perl5/5.00502/sun4-solaris/CORE
+PERL = /usr/local/bin/perl
+FULLPERL = /usr/local/bin/perl
+
+VERSION_MACRO = VERSION
+DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\"
+XS_VERSION_MACRO = XS_VERSION
+XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\"
+
+MAKEMAKER = /usr/local/lib/perl5/5.00502/ExtUtils/MakeMaker.pm
+MM_VERSION = 5.4301
+
+# FULLEXT = Pathname for extension directory (eg Foo/Bar/Oracle).
+# BASEEXT = Basename part of FULLEXT. May be just equal FULLEXT. (eg Oracle)
+# ROOTEXT = Directory part of FULLEXT with leading slash (eg /DBD) !!! Deprecated from MM 5.32 !!!
+# PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar)
+# DLBASE = Basename part of dynamic library. May be just equal BASEEXT.
+FULLEXT = RTF/Parser
+BASEEXT = Parser
+PARENT_NAME = RTF
+DLBASE = $(BASEEXT)
+VERSION_FROM = lib/RTF/Parser.pm
+OBJECT =
+LDFROM = $(OBJECT)
+LINKTYPE = dynamic
+
+# Handy lists of source code files:
+XS_FILES=
+C_FILES =
+O_FILES =
+H_FILES =
+MAN1PODS =
+MAN3PODS =
+INST_MAN1DIR = blib/man1
+INSTALLMAN1DIR = /usr/local/man/man1
+MAN1EXT = 1
+INST_MAN3DIR = blib/man3
+INSTALLMAN3DIR = /usr/local/lib/perl5/5.00502/man/man3
+MAN3EXT = 3
+PERM_RW = 644
+PERM_RWX = 755
+
+# work around a famous dec-osf make(1) feature(?):
+makemakerdflt: all
+
+.SUFFIXES: .xs .c .C .cpp .cxx .cc $(OBJ_EXT)
+
+# Nick wanted to get rid of .PRECIOUS. I don't remember why. I seem to recall, that
+# some make implementations will delete the Makefile when we rebuild it. Because
+# we call false(1) when we rebuild it. So make(1) is not completely wrong when it
+# does so. Our milage may vary.
+# .PRECIOUS: Makefile # seems to be not necessary anymore
+
+.PHONY: all config static dynamic test linkext manifest
+
+# Where is the Config information that we are using/depend on
+CONFIGDEP = $(PERL_ARCHLIB)/Config.pm $(PERL_INC)/config.h
+
+# Where to put things:
+INST_LIBDIR = $(INST_LIB)/RTF
+INST_ARCHLIBDIR = $(INST_ARCHLIB)/RTF
+
+INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT)
+INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT)
+
+INST_STATIC =
+INST_DYNAMIC =
+INST_BOOT =
+
+EXPORT_LIST =
+
+PERL_ARCHIVE =
+
+TO_INST_PM = lib/RTF/Charsets.pm \
+ lib/RTF/Config.pm \
+ lib/RTF/Control.pm \
+ lib/RTF/HTML/Converter.pm \
+ lib/RTF/HTML/ansi \
+ lib/RTF/HTML/char_map \
+ lib/RTF/Parser.pm \
+ lib/RTF/TEXT/Converter.pm \
+ lib/RTF/TEXT/ansi \
+ lib/RTF/TEXT/char_map
+
+PM_TO_BLIB = lib/RTF/TEXT/char_map \
+ $(INST_LIB)/RTF/TEXT/char_map \
+ lib/RTF/TEXT/ansi \
+ $(INST_LIB)/RTF/TEXT/ansi \
+ lib/RTF/TEXT/Converter.pm \
+ $(INST_LIB)/RTF/TEXT/Converter.pm \
+ lib/RTF/HTML/Converter.pm \
+ $(INST_LIB)/RTF/HTML/Converter.pm \
+ lib/RTF/HTML/char_map \
+ $(INST_LIB)/RTF/HTML/char_map \
+ lib/RTF/HTML/ansi \
+ $(INST_LIB)/RTF/HTML/ansi \
+ lib/RTF/Charsets.pm \
+ $(INST_LIB)/RTF/Charsets.pm \
+ lib/RTF/Parser.pm \
+ $(INST_LIB)/RTF/Parser.pm \
+ lib/RTF/Config.pm \
+ $(INST_LIB)/RTF/Config.pm \
+ lib/RTF/Control.pm \
+ $(INST_LIB)/RTF/Control.pm
+
+
+# --- MakeMaker tool_autosplit section:
+
+# Usage: $(AUTOSPLITFILE) FileToSplit AutoDirToSplitInto
+AUTOSPLITFILE = $(PERL) "-I$(PERL_ARCHLIB)" "-I$(PERL_LIB)" -e 'use AutoSplit;autosplit($$ARGV[0], $$ARGV[1], 0, 1, 1) ;'
+
+
+# --- MakeMaker tool_xsubpp section:
+
+
+# --- MakeMaker tools_other section:
+
+SHELL = /bin/sh
+CHMOD = chmod
+CP = cp
+LD = gcc
+MV = mv
+NOOP = $(SHELL) -c true
+RM_F = rm -f
+RM_RF = rm -rf
+TEST_F = test -f
+TOUCH = touch
+UMASK_NULL = umask 0
+DEV_NULL = > /dev/null 2>&1
+
+# The following is a portable way to say mkdir -p
+# To see which directories are created, change the if 0 to if 1
+MKPATH = $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Command -e mkpath
+
+# This helps us to minimize the effect of the .exists files A yet
+# better solution would be to have a stable file in the perl
+# distribution with a timestamp of zero. But this solution doesn't
+# need any changes to the core distribution and works with older perls
+EQUALIZE_TIMESTAMP = $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Command -e eqtime
+
+# Here we warn users that an old packlist file was found somewhere,
+# and that they should call some uninstall routine
+WARN_IF_OLD_PACKLIST = $(PERL) -we 'exit unless -f $$ARGV[0];' \
+-e 'print "WARNING: I have found an old package in\n";' \
+-e 'print "\t$$ARGV[0].\n";' \
+-e 'print "Please make sure the two installations are not conflicting\n";'
+
+UNINST=0
+VERBINST=1
+
+MOD_INSTALL = $(PERL) -I$(INST_LIB) -I$(PERL_LIB) -MExtUtils::Install \
+-e "install({@ARGV},'$(VERBINST)',0,'$(UNINST)');"
+
+DOC_INSTALL = $(PERL) -e '$$\="\n\n";' \
+-e 'print "=head2 ", scalar(localtime), ": C<", shift, ">", " L<", shift, ">";' \
+-e 'print "=over 4";' \
+-e 'while (defined($$key = shift) and defined($$val = shift)){print "=item *";print "C<$$key: $$val>";}' \
+-e 'print "=back";'
+
+UNINSTALL = $(PERL) -MExtUtils::Install \
+-e 'uninstall($$ARGV[0],1,1); print "\nUninstall is deprecated. Please check the";' \
+-e 'print " packlist above carefully.\n There may be errors. Remove the";' \
+-e 'print " appropriate files manually.\n Sorry for the inconveniences.\n"'
+
+
+# --- MakeMaker dist section:
+
+DISTVNAME = $(DISTNAME)-$(VERSION)
+TAR = tar
+TARFLAGS = cvf
+ZIP = zip
+ZIPFLAGS = -r
+COMPRESS = gzip
+SUFFIX = gz
+SHAR = shar
+PREOP = @$(NOOP)
+POSTOP = @$(NOOP)
+TO_UNIX = @$(NOOP)
+CI = ci -u
+RCS_LABEL = rcs -Nv$(VERSION_SYM): -q
+DIST_CP = best
+DIST_DEFAULT = tardist
+
+
+# --- MakeMaker macro section:
+
+
+# --- MakeMaker depend section:
+
+
+# --- MakeMaker cflags section:
+
+
+# --- MakeMaker const_loadlibs section:
+
+
+# --- MakeMaker const_cccmd section:
+
+
+# --- MakeMaker post_constants section:
+
+
+# --- MakeMaker pasthru section:
+
+PASTHRU = LIB="$(LIB)"\
+ LIBPERL_A="$(LIBPERL_A)"\
+ LINKTYPE="$(LINKTYPE)"\
+ PREFIX="$(PREFIX)"\
+ OPTIMIZE="$(OPTIMIZE)"
+
+
+# --- MakeMaker c_o section:
+
+
+# --- MakeMaker xs_c section:
+
+
+# --- MakeMaker xs_o section:
+
+
+# --- MakeMaker top_targets section:
+
+#all :: config $(INST_PM) subdirs linkext manifypods
+
+all :: pure_all manifypods
+ @$(NOOP)
+
+pure_all :: config pm_to_blib subdirs linkext
+ @$(NOOP)
+
+subdirs :: $(MYEXTLIB)
+ @$(NOOP)
+
+config :: Makefile $(INST_LIBDIR)/.exists
+ @$(NOOP)
+
+config :: $(INST_ARCHAUTODIR)/.exists
+ @$(NOOP)
+
+config :: $(INST_AUTODIR)/.exists
+ @$(NOOP)
+
+config :: Version_check
+ @$(NOOP)
+
+
+$(INST_AUTODIR)/.exists :: /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h
+ @$(MKPATH) $(INST_AUTODIR)
+ @$(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h $(INST_AUTODIR)/.exists
+
+ -@$(CHMOD) $(PERM_RWX) $(INST_AUTODIR)
+
+$(INST_LIBDIR)/.exists :: /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h
+ @$(MKPATH) $(INST_LIBDIR)
+ @$(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h $(INST_LIBDIR)/.exists
+
+ -@$(CHMOD) $(PERM_RWX) $(INST_LIBDIR)
+
+$(INST_ARCHAUTODIR)/.exists :: /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h
+ @$(MKPATH) $(INST_ARCHAUTODIR)
+ @$(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h $(INST_ARCHAUTODIR)/.exists
+
+ -@$(CHMOD) $(PERM_RWX) $(INST_ARCHAUTODIR)
+
+help:
+ perldoc ExtUtils::MakeMaker
+
+Version_check:
+ @$(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) \
+ -MExtUtils::MakeMaker=Version_check \
+ -e "Version_check('$(MM_VERSION)')"
+
+
+# --- MakeMaker linkext section:
+
+linkext :: $(LINKTYPE)
+ @$(NOOP)
+
+
+# --- MakeMaker dlsyms section:
+
+
+# --- MakeMaker dynamic section:
+
+## $(INST_PM) has been moved to the all: target.
+## It remains here for awhile to allow for old usage: "make dynamic"
+#dynamic :: Makefile $(INST_DYNAMIC) $(INST_BOOT) $(INST_PM)
+dynamic :: Makefile $(INST_DYNAMIC) $(INST_BOOT)
+ @$(NOOP)
+
+
+# --- MakeMaker dynamic_bs section:
+
+BOOTSTRAP =
+
+
+# --- MakeMaker dynamic_lib section:
+
+
+# --- MakeMaker static section:
+
+## $(INST_PM) has been moved to the all: target.
+## It remains here for awhile to allow for old usage: "make static"
+#static :: Makefile $(INST_STATIC) $(INST_PM)
+static :: Makefile $(INST_STATIC)
+ @$(NOOP)
+
+
+# --- MakeMaker static_lib section:
+
+
+# --- MakeMaker manifypods section:
+
+manifypods : pure_all
+ @$(NOOP)
+
+
+# --- MakeMaker processPL section:
+
+
+# --- MakeMaker installbin section:
+
+$(INST_SCRIPT)/.exists :: /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h
+ @$(MKPATH) $(INST_SCRIPT)
+ @$(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.00502/sun4-solaris/CORE/perl.h $(INST_SCRIPT)/.exists
+
+ -@$(CHMOD) $(PERM_RWX) $(INST_SCRIPT)
+
+EXE_FILES = rtf2html rtf2text
+
+FIXIN = $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::MakeMaker \
+ -e "MY->fixin(shift)"
+
+all :: $(INST_SCRIPT)/rtf2text $(INST_SCRIPT)/rtf2html
+ @$(NOOP)
+
+realclean ::
+ rm -f $(INST_SCRIPT)/rtf2text $(INST_SCRIPT)/rtf2html
+
+$(INST_SCRIPT)/rtf2text: rtf2text Makefile $(INST_SCRIPT)/.exists
+ @rm -f $(INST_SCRIPT)/rtf2text
+ cp rtf2text $(INST_SCRIPT)/rtf2text
+ $(FIXIN) $(INST_SCRIPT)/rtf2text
+ -@$(CHMOD) $(PERM_RWX) $(INST_SCRIPT)/rtf2text
+
+$(INST_SCRIPT)/rtf2html: rtf2html Makefile $(INST_SCRIPT)/.exists
+ @rm -f $(INST_SCRIPT)/rtf2html
+ cp rtf2html $(INST_SCRIPT)/rtf2html
+ $(FIXIN) $(INST_SCRIPT)/rtf2html
+ -@$(CHMOD) $(PERM_RWX) $(INST_SCRIPT)/rtf2html
+
+
+# --- MakeMaker subdirs section:
+
+# none
+
+# --- MakeMaker clean section:
+
+# Delete temporary files but do not touch installed files. We don't delete
+# the Makefile here so a later make realclean still has a makefile to use.
+
+clean ::
+ -rm -rf ./blib $(MAKE_APERL_FILE) $(INST_ARCHAUTODIR)/extralibs.all perlmain.c mon.out core so_locations pm_to_blib *~ */*~ */*/*~ *$(OBJ_EXT) *$(LIB_EXT) perl.exe $(BOOTSTRAP) $(BASEEXT).bso $(BASEEXT).def $(BASEEXT).exp
+ -mv Makefile Makefile.old $(DEV_NULL)
+
+
+# --- MakeMaker realclean section:
+
+# Delete temporary files (via clean) and also delete installed files
+realclean purge :: clean
+ rm -rf $(INST_AUTODIR) $(INST_ARCHAUTODIR)
+ rm -f $(INST_LIB)/RTF/TEXT/char_map $(INST_LIB)/RTF/TEXT/ansi $(INST_LIB)/RTF/TEXT/Converter.pm $(INST_LIB)/RTF/HTML/Converter.pm $(INST_LIB)/RTF/HTML/char_map $(INST_LIB)/RTF/HTML/ansi $(INST_LIB)/RTF/Charsets.pm $(INST_LIB)/RTF/Parser.pm $(INST_LIB)/RTF/Config.pm $(INST_LIB)/RTF/Control.pm
+ rm -rf Makefile Makefile.old
+
+
+# --- MakeMaker dist_basics section:
+
+distclean :: realclean distcheck
+
+distcheck :
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Manifest=fullcheck \
+ -e fullcheck
+
+skipcheck :
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Manifest=skipcheck \
+ -e skipcheck
+
+manifest :
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Manifest=mkmanifest \
+ -e mkmanifest
+
+
+# --- MakeMaker dist_core section:
+
+dist : $(DIST_DEFAULT)
+ @$(PERL) -le 'print "Warning: Makefile possibly out of date with $$vf" if ' \
+ -e '-e ($$vf="$(VERSION_FROM)") and -M $$vf < -M "Makefile";'
+
+tardist : $(DISTVNAME).tar$(SUFFIX)
+
+zipdist : $(DISTVNAME).zip
+
+$(DISTVNAME).tar$(SUFFIX) : distdir
+ $(PREOP)
+ $(TO_UNIX)
+ $(TAR) $(TARFLAGS) $(DISTVNAME).tar $(DISTVNAME)
+ $(RM_RF) $(DISTVNAME)
+ $(COMPRESS) $(DISTVNAME).tar
+ $(POSTOP)
+
+$(DISTVNAME).zip : distdir
+ $(PREOP)
+ $(ZIP) $(ZIPFLAGS) $(DISTVNAME).zip $(DISTVNAME)
+ $(RM_RF) $(DISTVNAME)
+ $(POSTOP)
+
+uutardist : $(DISTVNAME).tar$(SUFFIX)
+ uuencode $(DISTVNAME).tar$(SUFFIX) \
+ $(DISTVNAME).tar$(SUFFIX) > \
+ $(DISTVNAME).tar$(SUFFIX)_uu
+
+shdist : distdir
+ $(PREOP)
+ $(SHAR) $(DISTVNAME) > $(DISTVNAME).shar
+ $(RM_RF) $(DISTVNAME)
+ $(POSTOP)
+
+
+# --- MakeMaker dist_dir section:
+
+distdir :
+ $(RM_RF) $(DISTVNAME)
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Manifest=manicopy,maniread \
+ -e "manicopy(maniread(),'$(DISTVNAME)', '$(DIST_CP)');"
+
+
+# --- MakeMaker dist_test section:
+
+disttest : distdir
+ cd $(DISTVNAME) && $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) Makefile.PL
+ cd $(DISTVNAME) && $(MAKE)
+ cd $(DISTVNAME) && $(MAKE) test
+
+
+# --- MakeMaker dist_ci section:
+
+ci :
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -MExtUtils::Manifest=maniread \
+ -e "@all = keys %{ maniread() };" \
+ -e 'print("Executing $(CI) @all\n"); system("$(CI) @all");' \
+ -e 'print("Executing $(RCS_LABEL) ...\n"); system("$(RCS_LABEL) @all");'
+
+
+# --- MakeMaker install section:
+
+install :: all pure_install doc_install
+
+install_perl :: all pure_perl_install doc_perl_install
+
+install_site :: all pure_site_install doc_site_install
+
+install_ :: install_site
+ @echo INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
+
+pure_install :: pure_$(INSTALLDIRS)_install
+
+doc_install :: doc_$(INSTALLDIRS)_install
+ @echo Appending installation info to $(INSTALLARCHLIB)/perllocal.pod
+
+pure__install : pure_site_install
+ @echo INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
+
+doc__install : doc_site_install
+ @echo INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
+
+pure_perl_install ::
+ @$(MOD_INSTALL) \
+ read $(PERL_ARCHLIB)/auto/$(FULLEXT)/.packlist \
+ write $(INSTALLARCHLIB)/auto/$(FULLEXT)/.packlist \
+ $(INST_LIB) $(INSTALLPRIVLIB) \
+ $(INST_ARCHLIB) $(INSTALLARCHLIB) \
+ $(INST_BIN) $(INSTALLBIN) \
+ $(INST_SCRIPT) $(INSTALLSCRIPT) \
+ $(INST_MAN1DIR) $(INSTALLMAN1DIR) \
+ $(INST_MAN3DIR) $(INSTALLMAN3DIR)
+ @$(WARN_IF_OLD_PACKLIST) \
+ $(SITEARCHEXP)/auto/$(FULLEXT)
+
+
+pure_site_install ::
+ @$(MOD_INSTALL) \
+ read $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist \
+ write $(INSTALLSITEARCH)/auto/$(FULLEXT)/.packlist \
+ $(INST_LIB) $(INSTALLSITELIB) \
+ $(INST_ARCHLIB) $(INSTALLSITEARCH) \
+ $(INST_BIN) $(INSTALLBIN) \
+ $(INST_SCRIPT) $(INSTALLSCRIPT) \
+ $(INST_MAN1DIR) $(INSTALLMAN1DIR) \
+ $(INST_MAN3DIR) $(INSTALLMAN3DIR)
+ @$(WARN_IF_OLD_PACKLIST) \
+ $(PERL_ARCHLIB)/auto/$(FULLEXT)
+
+doc_perl_install ::
+ -@$(DOC_INSTALL) \
+ "Module" "$(NAME)" \
+ "installed into" "$(INSTALLPRIVLIB)" \
+ LINKTYPE "$(LINKTYPE)" \
+ VERSION "$(VERSION)" \
+ EXE_FILES "$(EXE_FILES)" \
+ >> $(INSTALLARCHLIB)/perllocal.pod
+
+doc_site_install ::
+ -@$(DOC_INSTALL) \
+ "Module" "$(NAME)" \
+ "installed into" "$(INSTALLSITELIB)" \
+ LINKTYPE "$(LINKTYPE)" \
+ VERSION "$(VERSION)" \
+ EXE_FILES "$(EXE_FILES)" \
+ >> $(INSTALLARCHLIB)/perllocal.pod
+
+
+uninstall :: uninstall_from_$(INSTALLDIRS)dirs
+
+uninstall_from_perldirs ::
+ @$(UNINSTALL) $(PERL_ARCHLIB)/auto/$(FULLEXT)/.packlist
+
+uninstall_from_sitedirs ::
+ @$(UNINSTALL) $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist
+
+
+# --- MakeMaker force section:
+# Phony target to force checking subdirectories.
+FORCE:
+ @$(NOOP)
+
+
+# --- MakeMaker perldepend section:
+
+
+# --- MakeMaker makefile section:
+
+# We take a very conservative approach here, but it\'s worth it.
+# We move Makefile to Makefile.old here to avoid gnu make looping.
+Makefile : Makefile.PL $(CONFIGDEP)
+ @echo "Makefile out-of-date with respect to $?"
+ @echo "Cleaning current config before rebuilding Makefile..."
+ -@$(RM_F) Makefile.old
+ -@$(MV) Makefile Makefile.old
+ -$(MAKE) -f Makefile.old clean $(DEV_NULL) || $(NOOP)
+ $(PERL) "-I$(PERL_ARCHLIB)" "-I$(PERL_LIB)" Makefile.PL
+ @echo "==> Your Makefile has been rebuilt. <=="
+ @echo "==> Please rerun the make command. <=="
+ false
+
+# To change behavior to :: would be nice, but would break Tk b9.02
+# so you find such a warning below the dist target.
+#Makefile :: $(VERSION_FROM)
+# @echo "Warning: Makefile possibly out of date with $(VERSION_FROM)"
+
+
+# --- MakeMaker staticmake section:
+
+# --- MakeMaker makeaperl section ---
+MAP_TARGET = perl
+FULLPERL = /usr/local/bin/perl
+
+$(MAP_TARGET) :: static $(MAKE_APERL_FILE)
+ $(MAKE) -f $(MAKE_APERL_FILE) $@
+
+$(MAKE_APERL_FILE) : $(FIRST_MAKEFILE)
+ @echo Writing \"$(MAKE_APERL_FILE)\" for this $(MAP_TARGET)
+ @$(PERL) -I$(INST_ARCHLIB) -I$(INST_LIB) -I$(PERL_ARCHLIB) -I$(PERL_LIB) \
+ Makefile.PL DIR= \
+ MAKEFILE=$(MAKE_APERL_FILE) LINKTYPE=static \
+ MAKEAPERL=1 NORECURS=1 CCCDLFLAGS=
+
+
+# --- MakeMaker test section:
+
+TEST_VERBOSE=0
+TEST_TYPE=test_$(LINKTYPE)
+TEST_FILE = test.pl
+TEST_FILES = t/*.t
+TESTDB_SW = -d
+
+testdb :: testdb_$(LINKTYPE)
+
+test :: $(TEST_TYPE)
+
+test_dynamic :: pure_all
+ PERL_DL_NONLAZY=1 $(FULLPERL) -I$(INST_ARCHLIB) -I$(INST_LIB) -I$(PERL_ARCHLIB) -I$(PERL_LIB) -e 'use Test::Harness qw(&runtests $$verbose); $$verbose=$(TEST_VERBOSE); runtests @ARGV;' $(TEST_FILES)
+
+testdb_dynamic :: pure_all
+ PERL_DL_NONLAZY=1 $(FULLPERL) $(TESTDB_SW) -I$(INST_ARCHLIB) -I$(INST_LIB) -I$(PERL_ARCHLIB) -I$(PERL_LIB) $(TEST_FILE)
+
+test_ : test_dynamic
+
+test_static :: test_dynamic
+testdb_static :: testdb_dynamic
+
+
+# --- MakeMaker ppd section:
+# Creates a PPD (Perl Package Description) for a binary distribution.
+ppd:
+ @$(PERL) -e "print qq{<SOFTPKG NAME=\"RTF-Parser\" VERSION=\"1,07,0,0\">\n}. qq{\t<TITLE>RTF-Parser</TITLE>\n}. qq{\t<ABSTRACT></ABSTRACT>\n}. qq{\t<AUTHOR></AUTHOR>\n}. qq{\t<IMPLEMENTATION>\n}. qq{\t\t<OS NAME=\"$(OSNAME)\" />\n}. qq{\t\t<CODEBASE HREF=\"\" />\n}. qq{\t</IMPLEMENTATION>\n}. qq{</SOFTPKG>\n}" > RTF-Parser.ppd
+
+# --- MakeMaker pm_to_blib section:
+
+pm_to_blib: $(TO_INST_PM)
+ @$(PERL) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)" \
+ "-I$(PERL_ARCHLIB)" "-I$(PERL_LIB)" -MExtUtils::Install \
+ -e "pm_to_blib({qw{$(PM_TO_BLIB)}},'$(INST_LIB)/auto')"
+ @$(TOUCH) $@
+
+
+# --- MakeMaker selfdocument section:
+
+
+# --- MakeMaker postamble section:
+
+
+# End.
@@ -1,12 +1,17 @@
+#
+# Makefile.PL for RTF::Parser distribution
+#
+# Filename: Makefile.PL
+# Created: 21 October 1998
+#
use ExtUtils::MakeMaker;
&WriteMakefile(
NAME => 'RTF::Parser',
- EXE_FILES => [ 'bin/rtf2html', 'bin/rtf2text' ],
+ EXE_FILES => [ 'rtf2html', 'rtf2text' ],
DISTNAME => 'RTF-Parser',
VERSION_FROM => 'lib/RTF/Parser.pm',
- PREREQ_PM => { 'RTF::Tokenizer' => '1.04' },
dist => {COMPRESS => 'gzip', SUFFIX => 'gz', },
);
@@ -1,16 +1,40 @@
-RTF::Parser
------------
- Please note this is a beta release from a new maintainer. The original
-release had no documentation or tests - I hope to get around to writing
-those in time.
+ RTF Processor V 1.07 - ALPHA
- The rtf2html program, as the original author states, is just a an
-example, not a fully-fledged application.
- +Pete Sergeant
+The RTF package set is a minimal RTF processor. This set is provided
+with an RTF to HTML converter. This converter is just an example, not
+a full-fledged application.
- rtf-parser@clueball.com
+You can try this converter the RTF specification (see the "Ressources"
+section). For example:
+
+ % rtf2html RTF-Spec-1_5.rtf > RTF-Spec-1_5.htm
+
+Philippe Verdret
+pverdret@sonovision-itep.fr
+
+__________________
+Module description
+
+Name DSLI Description Info
+
+------------- ---- -------------------------------------------- -----
+
+RTF::Parser adpO Base class for parsing RTF files PVERD
+
+
+____
+TODO
+
+- write a documentation
+
+__________
+Ressources
+
+Specification for RTF and filters are available at:
+- <ftp://ftp.primate.wisc.edu/pub/RTF>
+
+General information on converters:
+- <http://www.kfa-juelich.de/isr/1/texconv.html>
- Work on RTF processing modules is being carried out under a grant from
-The Perl Foundation.
@@ -1,66 +0,0 @@
-#!/usr/local/bin/perl
-# Sonovision-Itep, Verdret 1995-1999
-
-require 5.004;
-use strict;
-
-my $VERSION = "1.07";
-
-use Getopt::Long;
-use File::Basename;
-
-use vars qw/$BASENAME $DIRNAME/;
-BEGIN {
- ($BASENAME, $DIRNAME) = fileparse($0);
-}
-use lib "$DIRNAME/lib";
-
-my $usage = "usage:
- -h print this help
- -l log_file RTF_file process RTF_file and generate a log file
- -V print version number
-";
-my $help = "";
-
-use vars qw($EOM $trace);
-$trace = 0;
-$EOM = "\n"; # end of message
-
-use RTF::Config;
-
-die "$usage" unless @ARGV;
-use vars qw($trace $opt_d $opt_h $opt_t $opt_v $opt_V);
-{ local $SIG{__WARN__} = sub {};
- GetOptions('h', # Help
- 't=s', # name of the target document
- 'r=s', # name of the report file
- 'd', # debugging mode
- 'v', # verbose
- 'V', # print version number
- 'l=s' => \$LOG_FILE, # -l logfile
- ) or die "$usage$EOM";
-}
-
-if ($opt_h) {
- print STDOUT "$help\n";
- exit 0;
-}
-if ($opt_V) {
- print STDOUT "$VERSION\n";
- exit 0;
-}
-if ($opt_d) {
- $| = 1;
- $EOM = "";
-}
-
-select(STDOUT);
-
-require RTF::HTML::Converter;
-my $self = new RTF::HTML::Converter(Output => \*STDOUT); # actually the default
-
-foreach my $filename (@ARGV) {
- $self->parse_stream($filename);
-}
-
-1;
@@ -1,66 +0,0 @@
-#!/usr/local/bin/perl
-# Sonovision-Itep, Verdret 1995-1999
-
-require 5.004;
-use strict;
-
-my $VERSION = "1.03";
-
-use Getopt::Long;
-use File::Basename;
-
-use vars qw/$BASENAME $DIRNAME/;
-BEGIN {
- ($BASENAME, $DIRNAME) = fileparse($0);
-}
-use lib "$DIRNAME/lib";
-
-my $usage = "usage:
- -h print this help
- -l log_file RTF_file process RTF_file and generate a log file
- -V print version number
-";
-my $help = "";
-
-use vars qw($EOM $trace);
-$trace = 0;
-$EOM = "\n"; # end of message
-
-use RTF::Config;
-
-die "$usage" unless @ARGV;
-use vars qw($trace $opt_d $opt_h $opt_t $opt_v $opt_V);
-{ local $SIG{__WARN__} = sub {};
- GetOptions('h', # Help
- 't=s', # name of the target document
- 'r=s', # name of the report file
- 'd', # debugging mode
- 'v', # verbose
- 'V', # print version number
- 'l=s' => \$LOG_FILE, # -l logfile
- ) or die "$usage$EOM";
-}
-
-if ($opt_h) {
- print STDOUT "$help\n";
- exit 0;
-}
-if ($opt_V) {
- print STDOUT "$VERSION\n";
- exit 0;
-}
-if ($opt_d) {
- $| = 1;
- $EOM = "";
-}
-
-select(STDOUT);
-
-require RTF::TEXT::Converter;
-my $self = new RTF::TEXT::Converter(Output => \*STDOUT); # actually the default
-
-foreach my $filename (@ARGV) {
- $self->parse_stream($filename);
-}
-
-1;
@@ -1,58 +0,0 @@
-
-=head1 RTF Overview
-
-RTF is a format that can express complex formatted documents in ASCII.
-This document expects that you have some familiarity with it, and are
-familiar with the terms 'text field', 'control word', and 'group' as
-they apply to RTF.
-
-Sean Burke has very usefully written:
-
-An online introduction to RTF
-
-L<http://search.cpan.org/~sburke/RTF-Writer/lib/RTF/Cookbook.pod>
-
-The RTF Pocket Guide
-
-L<http://www.oreilly.com/catalog/rtfpg/>
-
-=head1 What the different modules do
-
-=head2 RTF::Parser
-
-L<RTF::Parser> is the foundation class. It's essentially a wrapper
-for <RTF::Tokenizer> that executes events that you define as it comes
-across different parts of the RTF document - it has handlers for
-when groups open, when they close, when we come across a control word,
-and so on.
-
-=head2 RTF::Control
-
-L<RTF::Control> sub-classes L<RTF::Parser>. It defines a number of events
-for which you can write handlers that are specifically geared towards
-people who want to write converters - where RTF::Parser things in terms
-of control words and groups, RTF::Control thinks in terms of paragraphs,
-and formatting properties for text.
-
-=head2 RTF::TEXT::Converter, RTF::HTML::Converter
-
-L<RTF::TEXT::Converter> and L<RTF::HTML::Converter> sub-class L<RTF::Control>,
-and are essentially documents that specify how to react to RTF::Control's
-events to spit RTF out as text and HTML respectively. You can also use them
-to inline RTF conversion to text or HTML in your documents.
-
-=head1 AUTHOR
-
-Peter Sergeant C<rtf.parser@clueball.com>
-
-=head1 COPYRIGHT
-
-Copyright 2004 B<Pete Sergeant>.
-
-This documentation is free documentation; you can redistribute it and/or modify it under
-the same terms as Perl itself.
-
-=head1 CREDITS
-
-This work was carried out under a grant generously provided by The Perl Foundation -
-give them money!
@@ -1,135 +0,0 @@
-
-*************** IMPORTANT ******************
-
-These are rough notes of mine on what happens
-when you run rtf2html. They're not yet ready
-for human consumption. Please ignore lack of
-continuity and sanity.
-
-*************** IMPORTANT ******************
-
-
-
-rtf2html - takes the options and the filenames, and then
-loads RTF::HTML::Convertor. It sets the output channel to
-STDOUT (which it claims to default to anyway), and then
-loops over the filenames it was given and invokes
-the 'parse_stream( $filename )' method on each file...
-
-RTF::HTML::Convertor is a subclass of RTF::Control which
-is a subclass of RTF::Parser, which is a wrapper around
-RTF::Tokenizer. :-)
-
-RTF::HTML::Convertor sets definitions for the basic RTF::Parser
-API - overriding some subs. Specifically, it over-rides:
-
- text()
-
- symbol()
-
- %symbol -- defines a lot of this
-
- %do_on_control{'ansi'}
-
- %do_on_event -- lots of this
-
-RTF::Control, as previously mentioned, is a subclass of
-RTF::Parser, and as such, provides a number of default
-over-rides for RTF::Parser's API, specifically to help
-people building modules like RTF::HTML::Convertor.
-
-It exports:
-
- new() - a wrapper of RTF::Parser's new, with some extra option handling
- output - a function to output text to the right place
- %symbol - a symbol transliteration hash: e.g. $symbol{'~'} = ' ';
- %info - meta-data about the document that it parses out for you
- %do_on_event - things to do when certain abstract things happen (we encounter a table)
- %do_on_control - what to do when we encounter a specific control word
- %par_props - properties set for the current paragraph
- $style - the style we're currently using
- $newstyle - the style we're changing into, if we're about to change
- $event - current event
- $text - pending text to print
-
-as well as all the things that RTF::Parser exports, so,
-essentially:
-
- parse_start()
- parse_end()
- group_end()
- group_start()
- text()
- char()
- symbol()
-
-So now we're going to follow exactly what happens when we
-step through a really simple RTF document and convert it to
-HTML using the rtf2html tool.
-
-rtf2html reads in the filename, selects STDOUT as the default
-output mode, and creates a new RTF::HTML::Converter object.
-
-RTF::HTML::Converter doesn't define a new() method, so RTF::Control's
-one is used. RTF::Control's constructor method is simple - it invokes
-RTF::Parser's constructor method, and then invokes the _configure method
-(in RTF::Control) on this object. RTF::Parser::new() is also pretty
-simple - it sets some options depending on if RTF::Control is being
-used, and returns an object.
-
-It's on this object then that RTF::Parser::_configure() is called.
-_configure() reads in options the user passed to the constructor -
-at the moment, the only one recognised is 'output', which, if set,
-passes the value of 'output' to RTF::Control::set_top_output_to().
-
-RTF::Control::set_top_output_to() checks to see if it's been passed
-a filehandle or a reference to a scalar. Depending on which it's been passed, it
-over-rides the behaviour of the sub RTF::Control::flush_top_output
-to either print the top item of @RTF::Control::output_stack to said
-filehandle, or add it to the scalar referenced.
-
-rtf2html then runs the parse_stream method on our RTF::HTML::Converter
-object.
-
-This creates an RTF::Tokenizer object from our filehandle, and executes
-RTF::Parser::_parse(). parse_start() is called, which, in this case,
-is RTF::Control::parse_start(). This executes RTF::Control::push_output(),
-which adds a new and empty element to our output stack. This in turn
-executes the code reference in $RTF::HTML::Converter::do_on_event{'document'},
-which, as we've passed a 'start' event to it, calls RTF::Control::output() with
-a doctype to print. This adds that doctype to the current element at the end
-of our output stack. RTF::Control::flush_top_output() is then called, which
-prints the doctype to STDOUT. We call push_output again to add another blank
-element to our output stack, and return to RTF::Parser::_parse().
-
-We then enter the loop around all the tokens. The first token we come across
-opens a group, so $self->group_start is called, which in this case is
-RTF::Control::group_start(). Here we add paragraph properties, and character
-properties currently in force (although because we're just starting, there
-are none) to their respective stacks, and add a holder to @control to keep
-track of any controls we're going to open.
-
-The next token is a control word, 'rtf', with the argument 1. There's a
-%do_on_control entry defined in RTF::Control for 'rtf', so we execute that.
-It calls RTF::Control::push_output with 'nul', which over-rides our output()
-function to not do anything. It then sets 'rtf' to be an open control in
-@control (so that when we leave this group, it executes the %do_on_control
-entry for 'rtf', only with 'end' rather than 'start').
-
- \ansi
-
- This is defined in do_on_control in RTF::HTML::Converter. It does a lot
- of scary character-set stuff. Wibble.
-
- \deff0
-
- Ignored, because we don't have a handler
-
- \fonttbl
-
-
-
-
-
-
-
@@ -1,529 +1,190 @@
# Sonovision-Itep, Philippe Verdret 1998-1999
-# TPF - Pete Sergeant 2003 - 2004
-
-
-
-
-
-=head1 NAME
-
-RTF::Control - Application of RTF::Parser for document conversion
-
-=head1 DESCRIPTION
-
-Application of RTF::Parser for document conversion
-
-=head1 OVERVIEW
-
-L<RTF::Control> is a sublass of L<RTF::Parser>. L<RTF::Control> can be seen as
-a helper module for people wanting to write their own document convertors -
-L<RTF::HTML::Convertor> and L<RTF::TEXT::Convertor> both subclass it.
-
-I am the new maintainer of this module. My aim is to keep the interface
-identical to the old interface while cleaning up, documenting, and testing
-the internals. There are things in the interface I'm unhappy with, and things
-I like - however, I'm maintaining rather than developing the module, so, the
-interface is mostly frozen.
-
-=head1 HOW IT ALL WORKS
-
-For starters, go and look at the source of M<RTF::TEXT::Convertor>
-
-
-Except for B<RTF::Parser subs>, the following is a list of variables
-exported by RTF::Control that you're expected to tinker with in your
-own subclass.
-
-=head2 RTF::Parser subs
-
-If you read the docs of RTF::Parser you'll see that you can redefine some
-subs there - RTF::Control has its own definitions for all of these, but you
-might want to over-ride C<symbol()>, C<text()>, and C<char()>. We'll look
-at what the defaults of each of these do, and what you need to do if you
-want to override any of them a little further down.
-
-=head2 %symbol
-
-This hash is actually merged into %do_on_control, with the value wrapped in
-a subroutine that effectively says C<print shift>. You can put any control
-words that should map directly to a certain output in here - C<\tab>, for
-example could be C<$symbol{'tab'} = "\t">.
-
-=head2 %info
-
-This hash gets filled with document meta-data, as per the RTF specification.
-
-=head2 %par_props
-
-Not really sure, but paragraph properties
-
-=head2 %do_on_event %do_on_control
-
-%do_on_control tells us what to do when we meet a specific control word.
-The values are coderefs. %do_on_event also holds coderefs, but these are
-more abstract things to do, say when the stylesheet changes. %do_on_event
-thingies tend to be called by %do_on_control thingies, as far as I can tell.
-
-=head2 $style $newstyle
-
-Style is the current style, $newstyle is the one we're about to
-change to if we're about to change...
-
-=head2 $event
-
-Current event
-
-=head2 $text
-
-Pending text
-
-=cut
-
-# Define all our dependencies and other fluff
-
- use strict;
-
- require 5.003;
- package RTF::Control;
-
- use RTF::Parser;
- use RTF::Config;
- use RTF::Charsets; # define names of chars
-
- use File::Basename;
- use Exporter;
-
-# I'm an RTF::Parser! I'm an Exporter! I'm a class!
-# "When I grow up, I'm going to bovine university!"
-
- @RTF::Control::ISA = qw(Exporter RTF::Parser);
-
-# Define the symbols we'll be exporting - these are
-# documented in the API part of the POD and a little
-# further down
-
- use vars qw(
-
- %symbol %info %par_props %do_on_event %do_on_control
- $style $newstyle $event $text
-
- );
-
-# There are used to that we have named arguments for callbacks
-# I don't like this, but hey, I didn't design it, and I'm meant
-# to be maintaining the interface :-)
-
- use constant SELF => 0; # rtf processor instance
- use constant CONTROL => 1; # control word
- use constant ARG => 2; # associated argument
- use constant EVENT => 3; # start/end event
- use constant TOP => -1; # access to the TOP element of a stack
-
-
-# Actually export stuff...
-
- @RTF::Control::EXPORT = qw(
-
- output
-
- %symbol
- %info
- %do_on_event
- %do_on_control
- %par_props
-
- $style
- $newstyle
- $event
- $text
-
- SELF
- CONTROL
- ARG
- EVENT
- TOP
-
- );
-
-# Flags to specify where we are... Because this is all undocumented
-# I feel justified putting these into a hash at some point in the
-# near future... They also shouldn't be package variables, they
-# should be class variables, but, to be honest, trying to rid this
-# module of package variables seems an exercise in futility if I'm
-# actually trying to maintain the interface... I could internalise
-# everything that isn't part of the API though
-
- my $IN_STYLESHEET = 0; # inside or outside style table
- my $IN_FONTTBL = 0; # inside or outside font table
- my $IN_TABLE = 0;
-
-# Declare where we're going to be holding meta-data etc
- my %fonttbl;
- my %stylesheet;
- my %colortbl;
-
-# Property stacks
- my @par_props_stack = (); # stack of paragraph properties
- my @char_props_stack = (); # stack of character properties
- my @control = (); # stack of control instructions, rename control_stack
-
-# Some other stuff
- my $stylename = '';
- my $cstylename = ''; # previous encountered style
- my $cli = 0; # current line indent value
- my $styledef = '';
-
+#
+# Stack machine - must be application independant!
+#
+# defined some interesting events for your application
+# an application can redefine its own control callbacks if %do_on_control is exported
+
+# todo:
+# - output well-formed HTML
+# - better list processing
+# - process fields and bookmarks
+
+use strict;
+require 5.003;
+package RTF::Control;
+use RTF::Parser;
+use RTF::Config;
+use RTF::Charsets; # define names of chars
+
+use File::Basename;
+use Exporter;
+@RTF::Control::ISA = qw(Exporter RTF::Parser);
+
+ # here is what you can use in your application
+use vars qw(%symbol %info %do_on_event %par_props
+ %do_on_control
+ $style $newstyle $event $text
+ );
+###########################################################################
+ # Specification of the callback interface
+ # so you can easily reorder sub arguments
+use constant SELF => 0; # rtf processor instance
+use constant CONTROL => 1; # control word
+use constant ARG => 2; # associated argument
+use constant EVENT => 3; # start/end event
+use constant TOP => -1; # access to the TOP element of a stack
+###########################################################################
+ # symbols to export in the application layer
+@RTF::Control::EXPORT = qw(output
+ %symbol %info %do_on_event
+ %do_on_control
+ %par_props
+ $style $newstyle $event $text
+ SELF CONTROL ARG EVENT TOP
+ );
+###########################################################################
-=head2 new
-
-Returns an RTF::Control object. RTF::Control is a subclass of RTF::Parser.
-Internally, we call RTF::Parser's new() method, and then we call an internal
-method called _configure(), which takes care of options we were passed.
+%do_on_event = (); # output routines
+$style = ''; # current style
+$newstyle = ''; # new style if style changing
+$event = ''; # start or end
+$text = ''; # pending text
+%symbol = (); # symbol translations
+%info = (); # info part of the document
+%par_props = (); # paragraph properties
+###########################################################################
+ # Automata states, control modes
+my $IN_STYLESHEET = 0; # inside or outside style table
+my $IN_FONTTBL = 0; # inside or outside font table
+my $IN_TABLE = 0;
+
+my %fonttbl;
+my %stylesheet;
+my %colortbl;
+my @par_props_stack = (); # stack of paragraph properties
+my @char_props_stack = (); # stack of character properties
+my @control = (); # stack of control instructions, rename control_stack
+my $stylename = '';
+my $cstylename = ''; # previous encountered style
+my $cli = 0; # current line indent value
+my $styledef = '';
-ADD STUFF ON -output AND -confdir
-
-=cut
-
+###########################################################################
+ # Added methods
sub new {
-
- my $proto = shift;
- my $class = ref( $proto ) || $proto;
-
- my $self = $class->SUPER::new(@_);
-
- $self->_configure(@_);
-
- return $self;
-
-}
-
-# This is a private method. It accepts a hash (well, a list)
-# of values, and stores them. If one of them is 'output',
-# it calls a function I'm yet to examine. This was done
-# in a horrendous way - it's now a lot tidier. :-)
-
-sub _configure {
-
- my $self = shift;
-
- my %options = @_;
-
- # Sanitize the options
- my %clean_options;
- for my $key ( keys %options ) {
-
- my $oldkey = $key;
-
- $key =~ s/^-//;
- $key = lc($key);
-
- $clean_options{ $key } = $options{ $oldkey }
-
- }
-
- $self->{'_RTF_Control_Options'} = \%clean_options;
-
- $self->set_top_output_to( $clean_options{'output'} )
- if $clean_options{'output'};
-
- return $self;
-
-}
+ my $receiver = shift;
+ my $self = $receiver->SUPER::new(@_);
+ $self->configure(@_);
+}
+sub configure {
+ my $self = shift;
+ unless (@_ >= 2) { }
+ my ($key, $value);
+ while (@_ >= 2) {
+ ($key, $value) = (shift, shift);
+ if ($key =~ /^-?[Oo]utput$/) {
+ set_top_output_to($value);
+ $self->{$key} = $value;
+ } else {
+ last;
+ }
+ }
+ $self;
+}
use constant APPLICATION_DIR => 0;
-
-=head2 application_dir
-
-I'm leaving this method in because removing it will cause a backward-compatability
-nightmare. This method returns the ( wait for it ) path that the .pm file corresponding
-to the class that the object is contained, without a trailing semi-colon. Obviously
-this is nasty in several ways. If you've set C<-confdir> in C<new()> that will be
-returned instead. You should definitely take that route if you're on an OS on which
-Perl can't use / as a directory seperator.
-
-=cut
-
sub application_dir {
-
- # Grab our object
- my $self = shift;
-
- # Return -confdir if set
- return $self->{'_RTF_Control_Options'}->{'confdir'}
- if $self->{'_RTF_Control_Options'}->{'confdir'};
-
- # Grab the class name
- my $class = ref $self;
-
- # Clean it up and look it up in %INC
- $class =~ s|::|/|g;
- $class = $INC{"$class.pm"};
-
- return dirname $class;
-
-}
-
-=head2 charmap_reader
-
-This nicely abstracts away using application_dir and so on. It's a method
-call. It'll take the name of the class, and an argument for the module/file
-it's looking for. This is likely to be 'ansi' or 'charmap'. This argument,
-for historical reasons (ho ho ho) will have any _'s removed in the check for
-a module name ... C< $self->charmap_reader('char_map') > will thus look for, for
-example, C< RTF::TEXT::charmap > to load. It'll return the data in the file as
-an array of lines. This description sucks.
-
-=cut
-
-sub charmap_reader {
-
- my $self = shift;
- my $file = shift;
-
- my @char_map_data;
-
- # Try and work out what our character set module would be called...
- my $module_file = $file;
- $module_file =~ s/_//g;
- my $module_name = ref( $self ) . '::' . $module_file;
-
- # Can we load it?
- eval "use $module_name";
-
- # That would be a no...
- if ($@) {
-
- # Create a path for the charset file using the old method...
- my $charset_file = $_[SELF]->application_dir(__FILE__) . "/$file";
-
- # Try and open it...
- open( CHAR_MAP, "< $charset_file" ) or die
- "Unable to open the charset file '$charset_file': $!";
-
- # Read in the data...
- @char_map_data = (<CHAR_MAP>);
-
- # Why yes, yes we can...
- } else {
-
- my $sub_name = $module_name . '::' . 'data';
- @char_map_data = main->$sub_name();
-
- }
-
- return @char_map_data;
-
+ my $class = ref $_[SELF];
+ my $file;
+ ($file = $class) =~ s|::|/|g;
+ $file = $INC{"$file.pm"};
+ my $dirname;
+ if (-f $file) {
+ $dirname = dirname $file;
+ } else {
+ $dirname = dirname '.' . $file;
+ }
+ "$dirname";
}
-
###########################################################################
-
-# This stuff is all to do with the stack, and I'm not really sure how it
-# works. I'm hoping it'll become more obvious as I go. The routines themselves
-# are now all documented, but who knows what the stack is or why? hrm?
-
-
-# This hurts my little brane.
-
-# Holds the output stack
+ # Utils
+ # output stack management
my @output_stack;
-
-# Defines how large the output stack can be
-use constant MAX_OUTPUT_STACK_SIZE => 0; # PV: 8 seems a good value
- # PS: Then why not default it to that?
-
-=head1 Stack manipulation
-
-=head2 dump_stack
-
-Serializes and prints the stack to STDERR
-
-=cut
-
-# Serializes the stack, and prints it to STDERR.
+use constant MAX_OUTPUT_STACK_SIZE => 0; # 8 seems a good value
sub dump_stack {
-
- my $stack_size = @output_stack;
-
- print STDERR "Stack size: $stack_size\n";
-
- print STDERR $stack_size-- . " |$_|\n" for reverse @output_stack;
-
-}
-
-=head2 output
-
-Holder routine for the current thing to do with output text we're given.
-It starts off as the same as C<$string_output_sub>, which adds the string
-to the element at the C<TOP> of the output stack. However, the idea, I
-believe, is to allow that to be changed at will, using C<push_output>.
-
-=cut
-
-sub output {
-
- $output_stack[TOP] .= $_[0]
-
+ local($", $\) = ("\n") x 2;
+ my $i = @output_stack;
+ print STDERR "Stack size: $i";
+ print STDERR map { $i-- . " |$_|\n" } reverse @output_stack;
}
-
-# I'm guessing (because I'm generous ;-) that this is done because
-# subclasses might want to modifiy the values of these. These are
-# obviously the two different ways to spit out ... something. We
-# start with the string_output_sub being what &output does tho.
-
-my $nul_output_sub = sub {
- #print STDERR "** $_[0] **\n";
-};
-
-my $string_output_sub = sub {
-
- $output_stack[TOP] .= $_[0] if $_[0];
-
-};
-
-=head2 push_output
-
-Adds a blank element to the end of the stack. It will change (or
-maintain) the function of C<output> to be C<$string_output_sub>,
-unless you pass it the argument C< 'nul' >, in which case it will
-set C<output> to be C<$nul_output_sub>.
-
-=cut
-
+my $nul_output_sub = sub {};
+my $string_output_sub = sub { $output_stack[TOP] .= $_[0] };
+sub output { $output_stack[TOP] .= $_[0] }
sub push_output {
+ if (MAX_OUTPUT_STACK_SIZE) {
+ die "max size of output stack exceeded" if @output_stack == MAX_OUTPUT_STACK_SIZE;
+ }
- # If we've set a maximum output stack and then exceeded it, complain.
- if (MAX_OUTPUT_STACK_SIZE) {
- die "max size of output stack exceeded" if @output_stack == MAX_OUTPUT_STACK_SIZE;
- }
-
- # If we didn't get an argument, output becomes string...
- unless (defined($_[0])) {
-
- local $^W = 0;
- *output = $string_output_sub;
-
- # If we were given 'nul', set output to $nul_output_sub
- } elsif ($_[0] eq 'nul') {
-
- local $^W = 0;
- *output = $nul_output_sub;
-
- }
-
- # Add an empty element to the end of the ouput stack
- push @output_stack, '';
-
-}
-
-=head2 pop_output
-
-Removes and returns the last element of the ouput stack
-
-=cut
-
-# Remove and return an element of the output stack, which
-# should basically be the in-scope text... See how &do_on_info
-# uses this
-
-sub pop_output {
+ unless (defined($_[0])) {
+ local $^W = 0;
+ *output = $string_output_sub;
+ } elsif ($_[0] eq 'nul') {
+ local $^W = 0;
+ *output = $nul_output_sub;
+ }
- pop @output_stack;
-
+ push @output_stack, '';
}
-
+sub pop_output { pop @output_stack; }
use constant SET_TOP_OUTPUT_TO_TRACE => 0;
-
-=head2 set_top_output_to
-
-Only called at init time, is a method call not a function.
-Sets the action of C<flush_top_output>, depending on whether
-you pass it a filehandle or string reference.
-
-=cut
-
-# Sets flush_top_output to print to the appropriate thingy
sub set_top_output_to {
-
- my $self = shift;
-
- # Are we being passed a filehandle?
-
- local *X = $_[0];
-
- if (fileno X) {
-
- my $stream = *X;
-
- # Debugging info if asked for
- print STDERR "stream: ", fileno X, "\n" if SET_TOP_OUTPUT_TO_TRACE;
-
- # Turn off warnings
- local $^W = 0;
-
- # Overwrite &flush_top_output
- *flush_top_output = sub {
- print $stream $output_stack[TOP];
- $output_stack[TOP] = '';
- };
-
- # We've been passed a reference to a scalar...
-
- } elsif (ref $_[0] eq 'SCALAR') {
-
- print STDERR "output to string\n" if SET_TOP_OUTPUT_TO_TRACE;
-
- my $content_ref = $_[0];
-
- local $^W = 0;
-
- *flush_top_output = sub {
- $$content_ref .= $output_stack[TOP];
- $output_stack[TOP] = '';
- };
-
- # Someone's done something weird
-
- } else {
-
- warn "unknown output specification: $_[0]\n";
-
- }
+ local *X = $_[0];
+ if (fileno X) { # set_top_output_to(\*FH)
+ # is there a better way to do this?
+ my $stream = *X;
+ print STDERR "stream: ", fileno X, "\n" if SET_TOP_OUTPUT_TO_TRACE;
+ local $^W = 0;
+ *flush_top_output = sub {
+ print $stream $output_stack[TOP];
+ $output_stack[TOP] = '';
+ };
+ } elsif (ref $_[0] eq 'SCALAR') { # set_top_output_to(\$string)
+ print STDERR "output to string\n" if SET_TOP_OUTPUT_TO_TRACE;
+ my $content_ref = $_[0];
+ local $^W = 0;
+ *flush_top_output = sub {
+ $$content_ref .= $output_stack[TOP];
+ $output_stack[TOP] = '';
+ };
+ } else {
+ warn "unknown output specification: $_[0]\n";
+ }
}
-
# the default prints on the selected output filehandle
-
-=head2 flush_top_output
-
-Output the top element of the stack in the way specified by the call
-to C<set_top_output_to>
-
-=cut
-
sub flush_top_output {
-
print $output_stack[TOP];
$output_stack[TOP] = '';
-
}
-
-
+#sub print_output_stack {
+# if (@output_stack) {
+# print @output_stack;
+# @output_stack = ();
+# } else {
+# warn "empty \@output_stack\n";
+# }
+#}
###########################################################################
# Trace management
-use constant RTF_DEBUG => 0;
+use constant DEBUG => 0;
use constant TRACE => 0;
use constant STACK_TRACE => 0; #
use constant STYLESHEET_TRACE => 0; # If you want to see the stylesheet of the document
use constant STYLE_TRACE => 0; #
use constant LIST_TRACE => 0;
-$| = 1 if TRACE or STACK_TRACE or RTF_DEBUG;
-
-# Debugging function - prints the number of _'s matching
-# the number of controls in our current control stack,
-# and anything else we were passed, and the $. - input
-# line number.
-
+$| = 1 if TRACE or STACK_TRACE or DEBUG;
sub trace {
#my(@caller) = (caller(1));
#my $sub = (@caller)[3];
@@ -535,90 +196,45 @@ sub trace {
$SIG{__DIE__} = sub {
require Carp;
Carp::confess;
-} if RTF_DEBUG;
+} if DEBUG;
###########################################################################
# Some generic routines
use constant DISCARD_CONTENT => 0;
-
-# This seems to be what we do when we hit a control word
-# we're not going to parse. He seems to be manually
-# implementing this some times - I wonder why?
-
-sub discard_content {
-
- # Read in information about the control word we hit
- my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
-
- trace "($_[CONTROL], $_[ARG], $_[EVENT])" if DISCARD_CONTENT;
-
- # This I don't understand. Presumably if we've hit 0, then it's
- # the close of a part of the document being dictated by a char
- # property, like, say, \b1I'm bold\b0 I'm not.
-
- if ($_[ARG] eq "0") {
-
- # Remove the last element on the output stack
- pop_output();
-
- # Set the property as on(?) on the control stack
- # This should probably be a 0. Something to test
- # later.
- $control[TOP]->{"$_[CONTROL]1"} = 1;
-
- # Add a blank element to the end of the output stack
- } elsif ($_[EVENT] eq 'start') {
-
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } elsif ($_[ARG] eq "1") {
- $cevent = 'start';
- push_output();
-
- } elsif ($_[EVENT] eq 'end') { # End of discard
-
- my $string = pop_output();
-
- if (length $string > 30) {
- $string =~ s/(.{1,10}).*(.{1,10})/$1 ... $2/;
- }
-
- trace "discard content of \\$control: $string" if DISCARD_CONTENT;
-
- } else {
-
- die "($_[CONTROL], $_[ARG], $_[EVENT])" if DISCARD_CONTENT;
-
- }
-
+sub discard_content {
+ my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ trace "($_[CONTROL], $_[ARG], $_[EVENT])" if DISCARD_CONTENT;
+ if ($_[ARG] eq "0") {
+ pop_output();
+ $control[TOP]->{"$_[CONTROL]1"} = 1;
+ } elsif ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } elsif ($_[ARG] eq "1") { # see above
+ $cevent = 'start';
+ push_output();
+ } elsif ($_[EVENT] eq 'end') { # End of discard
+ my $string = pop_output();
+ if (length $string > 30) {
+ $string =~ s/(.{1,10}).*(.{1,10})/$1 ... $2/;
+ }
+ trace "discard content of \\$control: $string" if DISCARD_CONTENT;
+ } else {
+ die "($_[CONTROL], $_[ARG], $_[EVENT])" if DISCARD_CONTENT;
+ }
}
-
-# Document meta-data collator. Whenever we hit an info group,
-# this sub is called. All it does is put all the text 'in-scope'
-# into the %info hash...
-
-sub do_on_info {
-
- my $string;
-
- if ($_[EVENT] eq 'start') {
-
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- $string = pop_output();
- $info{"$_[CONTROL]$_[ARG]"} = $string;
-
- }
+sub do_on_info { # 'info' content
+ #my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ my $string;
+ if ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ $string = pop_output();
+ $info{"$_[CONTROL]$_[ARG]"} = $string;
+ }
}
-
-
-
-
# SYMBOLS
# default mapping for symbols
# char processed by the parser symbol() callback: - _ ~ : | { } * ' \\
@@ -639,16 +255,7 @@ $symbol{'tab'} = "\t";
$symbol{'line'} = "\n";
$symbol{'page'} = "\f";
-# Handler for symbols - prints the symbol corresponding
-# to our first argument...
-
-sub do_on_symbol {
-
- output $symbol{$_[CONTROL]};
-
-}
-
-
+sub do_on_symbol { output $symbol{$_[CONTROL]} }
my %symbol_ctrl = map { # install the do_on_symbol() routine
if (/^[a-z]+$/) {
$_ => \&do_on_symbol
@@ -657,25 +264,15 @@ my %symbol_ctrl = map { # install the do_on_symbol() routine
}
} keys %symbol;
-
-
-
-
###########################################################################################
my %char_props; # control hash must be declarated before install_callback()
# purpose: associate callbacks to controls
# 1. an hash name that contains the controls
# 2. a callback name
-
-# Sets the call back given as the second argument
-# as the %do_on_control for all controls currently
-# in %char_props. DON'T UNDERSTAND.
-
sub install_callback { # not a method!!!
-
my($control, $callback) = ($_[1], $_[2]);
no strict 'refs';
- unless ( %char_props ) { # why I can't write %{$control}
+ unless (defined(%char_props)) { # why I can't write %{$control}
die "'%$control' not defined";
}
for (keys %char_props) {
@@ -690,263 +287,139 @@ sub install_callback { # not a method!!!
#my %control_definition = ( # control => [default_value nassociated_callback]
# 'char_props' => qw(0 do_on_control),
# );
-
-
-
-# Remove character formatting properties ... there are actually more
-# character formatting properties defined in the RTF spec, but
-# these seem to be the ones supported by this module...
-
sub reset_char_props {
-
- %char_props = map {
-
- $_ => 0
-
- } qw(b i ul sub super strike);
+ %char_props = map {
+ $_ => 0
+ } qw(b i ul sub super strike);
}
-
-
my $char_prop_change = 0;
my %current_char_props = %char_props;
use constant OUTPUT_CHAR_PROPS => 0;
-
-# Force a START or END event on our current character
-# properties... This is a method call.
-
sub force_char_props { # force a START/END event
-
- # Obviously you're not allowed to do this in the fonttable
- # or style sheet...
-
- return if $IN_STYLESHEET or $IN_FONTTBL;
-
- trace "@_" if OUTPUT_CHAR_PROPS;
-
- # [0] is our object
- $event = $_[1]; # END or START
+ return if $IN_STYLESHEET or $IN_FONTTBL;
+ trace "@_" if OUTPUT_CHAR_PROPS;
+ $event = $_[1]; # END or START
# close or open all activated char prorperties
-
- push_output();
-
- while (my($char_prop, $value) = each %char_props) {
-
- next unless $value;
-
- trace "$event active char props: $char_prop" if OUTPUT_CHAR_PROPS;
-
- if (defined (my $action = $do_on_event{$char_prop})) {
-
- ($style, $event) = ($char_prop, $event);
-
- &$action;
-
- }
-
- $current_char_props{$char_prop} = $value;
-
- }
-
- $char_prop_change = 0;
-
- pop_output();
-
+ push_output();
+ while (my($char_prop, $value) = each %char_props) {
+ next unless $value;
+ trace "$event active char props: $char_prop" if OUTPUT_CHAR_PROPS;
+ if (defined (my $action = $do_on_event{$char_prop})) {
+ ($style, $event) = ($char_prop, $event);
+ &$action;
+ }
+ $current_char_props{$char_prop} = $value;
+ }
+ $char_prop_change = 0;
+ pop_output();
}
-
-
use constant PROCESS_CHAR_PROPS => 0;
-
-# Only run outside of stylesheets and fonttables,
-# and only when the $char_prop_change flag is
-# set.
-
sub process_char_props {
-
- return if $IN_STYLESHEET or $IN_FONTTBL;
-
- return unless $char_prop_change;
-
- # Add a new output block
-
- push_output();
-
- # Go through char_props (is the what we were, or what we're going to?!)
- while (my($char_prop, $value) = each %char_props) {
-
- # Get the current character property
- my $prop = $current_char_props{$char_prop};
-
- # Set it to an explicit 0 if not set
- $prop = defined $prop ? $prop : 0;
-
- trace "$char_prop $value" if PROCESS_CHAR_PROPS;
-
- # If the values in %char_props and $current_char_props don't match..
- if ($prop != $value) {
-
- # See if we have an event...
- if (defined (my $action = $do_on_event{$char_prop})) {
-
- # Set event to start or end depending on if
- # the $value is a literal 1.
- $event = $value == 1 ? 'start' : 'end';
-
- ($style, $event) = ($char_prop, $event);
-
- # Fire the event
- &$action;
-
- }
-
- # Set the $current_char_props to equal what was in %char_props
- $current_char_props{$char_prop} = $value;
-
- }
-
- trace "$char_prop - $prop - $value" if PROCESS_CHAR_PROPS;
-
- }
-
- # Reset the flag
- $char_prop_change = 0;
-
- # Return whatever was on the stack
- pop_output();
-
+ return if $IN_STYLESHEET or $IN_FONTTBL;
+ return unless $char_prop_change;
+ push_output();
+ while (my($char_prop, $value) = each %char_props) {
+ my $prop = $current_char_props{$char_prop};
+ $prop = defined $prop ? $prop : 0;
+ trace "$char_prop $value" if PROCESS_CHAR_PROPS;
+ if ($prop != $value) {
+ if (defined (my $action = $do_on_event{$char_prop})) {
+ $event = $value == 1 ? 'start' : 'end';
+ ($style, $event) = ($char_prop, $event);
+ &$action;
+ }
+ $current_char_props{$char_prop} = $value;
+ }
+ trace "$char_prop - $prop - $value" if PROCESS_CHAR_PROPS;
+ }
+ $char_prop_change = 0;
+ pop_output();
}
-
use constant DO_ON_CHAR_PROP => 0;
-
-# Again, not called in a font or stylesheet, for obvious reasons.
-# Set the char_prop_change flag. If the argument is '0', we set
-# that character property to that - if the event is start, we set
-# it to one, otherwise we throw a warning.
-
sub do_on_char_prop { # associated callback
-
- return if $IN_STYLESHEET or $IN_FONTTBL;
-
- my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
-
- trace "my(\$control, \$arg, \$cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);" if DO_ON_CHAR_PROP;
-
- $char_prop_change = 1;
-
- if (defined($_[ARG]) and $_[ARG] eq "0") { # \b0
-
- $char_props{$_[CONTROL]} = 0;
-
- } elsif ($_[EVENT] eq 'start') { # eg. \b or \b1
-
- $char_props{$_[CONTROL]} = 1;
-
- } else { # 'end'
-
- warn "statement not reachable";
- $char_props{$_[CONTROL]} = 0;
-
- }
-
+ return if $IN_STYLESHEET or $IN_FONTTBL;
+ my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ trace "my(\$control, \$arg, \$cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);" if DO_ON_CHAR_PROP;
+ $char_prop_change = 1;
+ if (defined($_[ARG]) and $_[ARG] eq "0") { # \b0
+ $char_props{$_[CONTROL]} = 0;
+ } elsif ($_[EVENT] eq 'start') { # eg. \b or \b1
+ $char_props{$_[CONTROL]} = 1;
+ } else { # 'end'
+ warn "statement not reachable";
+ $char_props{$_[CONTROL]} = 0;
+ }
}
-
-
-
-
-# LOOK MA! THIS BE IMPORTANT
__PACKAGE__->reset_char_props();
__PACKAGE__->install_callback('char_props', 'do_on_char_prop');
-
-
-
###########################################################################
# not more used!!!
-#use constant DO_ON_TOGGLE => 0;
-#sub do_on_toggle { # associated callback
-##
-#
- # return if $IN_STYLESHEET or $IN_FONTTBL;
- # my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
- # trace "my(\$control, \$arg, \$cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);" if DO_ON_TOGGLE;
-#
- # if ($_[ARG] eq "0") { # \b0, register an START event for this control
- # $control[TOP]->{"$_[CONTROL]1"} = 1; # register a start event for this properties
- # $cevent = 'end';
- # } elsif ($_[EVENT] eq 'start') { # \b or \b1
- # $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
- # } else { # $_[EVENT] eq 'end'
- # if ($_[ARG] eq "1") {
- # $cevent = 'start';
- # } else {
- # }
- # }
- # trace "(\$style, \$event, \$text) = ($control, $cevent, '')" if DO_ON_TOGGLE;
- # if (defined (my $action = $do_on_event{$control})) {
- # ($style, $event, $text) = ($control, $cevent, '');
- # &$action;
- # }
-#}
-
-
+use constant DO_ON_TOGGLE => 0;
+sub do_on_toggle { # associated callback
+ return if $IN_STYLESHEET or $IN_FONTTBL;
+ my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ trace "my(\$control, \$arg, \$cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);" if DO_ON_TOGGLE;
+
+ if ($_[ARG] eq "0") { # \b0, register an START event for this control
+ $control[TOP]->{"$_[CONTROL]1"} = 1; # register a start event for this properties
+ $cevent = 'end';
+ } elsif ($_[EVENT] eq 'start') { # \b or \b1
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else { # $_[EVENT] eq 'end'
+ if ($_[ARG] eq "1") {
+ $cevent = 'start';
+ } else {
+ }
+ }
+ trace "(\$style, \$event, \$text) = ($control, $cevent, '')" if DO_ON_TOGGLE;
+ if (defined (my $action = $do_on_event{$control})) {
+ ($style, $event, $text) = ($control, $cevent, '');
+ &$action;
+ }
+}
###########################################################################
# FLAGS
use constant DO_ON_FLAG => 0;
-
-# Simply sets that pargraph properties of said flag to 1
-
sub do_on_flag {
-
#my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
- die if $_[ARG]; # no argument by definition
- trace "$_[CONTROL]" if DO_ON_FLAG;
- $par_props{$_[CONTROL]} = 1;
-
+ die if $_[ARG]; # no argument by definition
+ trace "$_[CONTROL]" if DO_ON_FLAG;
+ $par_props{$_[CONTROL]} = 1;
}
use vars qw/%charset/;
my $bullet_item = 'b7'; # will be redefined in a next release!!!
-
# Try to find a "RTF/<application>/char_map" file
# possible values for the control word are: ansi, mac, pc, pca
sub define_charset {
-
- my $charset = $_[CONTROL];
- eval {
- no strict 'refs';
- *charset = \%{"$charset"};
- };
-
- warn $@ if $@;
-
- my @charset_data = $_[SELF]->charmap_reader('char_map');
-
- my ($name, $char, $hexa);
- my %char = map{
-
- s/^\s+//;
- next unless /\S/;
- ($name, $char) = split /\s+/;
-
- if (!defined($hexa = $charset{$name})) {
-
- 'undef' => undef;
-
- } else {
-
- $hexa => $char;
-
- }
-
- } (@charset_data);
-
- %charset = %char; # for a direct translation of hexadecimal values
- warn $@ if $@;
-
+ my $charset = $_[CONTROL];
+ eval {
+ no strict 'refs';
+ *charset = \%{"$charset"};
+ };
+ warn $@ if $@;
+
+ my $charset_file = $_[SELF]->application_dir() . "/char_map";
+ my $application = ref $_[SELF];
+ open CHAR_MAP, "$charset_file"
+ or die "unable to open the '$charset_file': $!";
+
+ my ($name, $char, $hexa);
+ my %char = map{
+ s/^\s+//;
+ next unless /\S/;
+ ($name, $char) = split /\s+/;
+ if (!defined($hexa = $charset{$name})) {
+ 'undef' => undef;
+ } else {
+ $hexa => $char;
+ }
+ } (<CHAR_MAP>);
+ %charset = %char; # for a direct translation of hexadecimal values
+ warn $@ if $@;
}
-
-
-
my %flag_ctrl =
(
'ql' => \&do_on_flag,
@@ -983,129 +456,498 @@ my %value_ctrl =
my %pn = (); # paragraph numbering
my $field_ref = ''; # identifier associated to a field
#trace "define callback for $_[CONTROL]";
-
-
-
-
-# BEGIN API REDEFINITION
-
-# Ok, so this is actually the place to start as far as concerns
-# working out how the hell^Wfuck this thing works. I'm moving
-# all the constants to the top, and adding API documentation
-# here so future readers will have less trouble.
-
-
-use constant GROUP_START_TRACE => 0;
-use constant GROUP_END_TRACE => 0;
-use constant TEXT_TRACE => 0;
-use constant PARSE_START_END => 0;
-
-# Called when we first start actually parsing the document
-sub parse_start {
-
- my $self = shift;
-
- # Place holders for non-printed data
-
- %info = ();
- %fonttbl = ();
- %colortbl = ();
- %stylesheet = ();
-
- # Add an initial element to our output stack
-
- push_output();
-
- # If there's an event defined for the start of a document,
- # execute it now...
-
- if (defined (my $action = $do_on_event{'document'})) {
-
- # $event tells our action handler what's happening...
-
- $event = 'start';
-
- # Actually execute said action
-
- &$action;
-
- }
-
- # Prints and clears the top element on the output stack
-
- flush_top_output();
-
- # Add another element to the output stack
-
- push_output();
-
-}
-
-# Called at the end of parsing
-sub parse_end {
-
- my $self = shift;
-
- # @output_stack+0 forces scalar context?
- trace "parseEnd \@output_stack: ", @output_stack+0 if STACK_TRACE;
-
- # Call the end of document even if it exists
- if (defined (my $action = $do_on_event{'document'})) {
-
- ($style, $event, $text) = ($cstylename, 'end', '');
- &$action;
-
- }
-
- # Print and clear the top element on the output stack
-
- flush_top_output(); # @output_stack == 2;
-
-}
+%do_on_control =
+ (
+ %do_on_control,
+ %flag_ctrl,
+ %value_ctrl,
+ %symbol_ctrl,
+ %destination_ctrl,
+ 'plain' => sub {
+ #unless (@control) { die "\@control stack is empty"; }
+ #output('plain');
+ reset_char_props();
+ },
+ 'rtf' => sub { # rtfN, N is version number
+ if ($_[EVENT] eq 'start') {
+ push_output('nul');
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ pop_output();
+ }
+ },
+ 'info' => sub { # {\info {...}}
+ if ($_[EVENT] eq 'start') {
+ push_output('nul');
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ pop_output();
+ }
+ },
+ # INFO GROUP
+ # Other informations:
+ # {\printim\yr1997\mo11\dy3\hr11\min5}
+ # {\version3}{\edmins1}{\nofpages3}{\nofwords1278}{\nofchars7287}
+ # {\*\company SONOVISION-ITEP}{\vern57443}
+ 'title' => \&do_on_info, # destination
+ 'author' => \&do_on_info, # destination
+ 'revtim' => \&do_on_info, # destination
+ 'creatim' => \&do_on_info, # destination, {\creatim\yr1996\mo9\dy18\hr9\min17}
+ 'yr' => sub { output "$_[ARG]-" }, # value
+ 'mo' => sub { output "$_[ARG]-" }, # value
+ 'dy' => sub { output "$_[ARG]-" }, # value
+ 'hr' => sub { output "$_[ARG]-" }, # value
+ 'min' => sub { output "$_[ARG]" }, # value
+ # binary data
+ 'bin' => sub { $_[SELF]->read_bin($_[ARG]) }, # value
+ # Color table - destination
+ 'colortbl' => \&discard_content,
+ # Font table - destination
+ 'fonttbl' => sub {
+ #trace "$#control $_[CONTROL] $_[ARG] $_[EVENT]";
+ if ($_[EVENT] eq 'start') {
+ $IN_FONTTBL = 1 ;
+ push_output('nul');
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ $IN_FONTTBL = 0 ;
+ pop_output();
+ }
+ },
+ # file table - destination
+ 'filetbl' => sub {
+ #trace "$#control $_[CONTROL] $_[ARG] $_[EVENT]";
+ if ($_[EVENT] eq 'start') {
+ push_output('nul');
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ pop_output();
+ }
+ },
+
+ 'f', sub {
+ #my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ # perhaps interesting to provide a contextual
+ # definition of this kind of control words
+ # eg. in fonttbl call 'fonttbl:f', outside call 'f'
+ use constant FONTTBL_TRACE => 0; # if you want to see the fonttbl of the document
+ if ($IN_FONTTBL) {
+ if ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ my $fontname = pop_output;
+ my $fontdef = "$_[CONTROL]$_[ARG]";
+ if ($fontname =~ s/\s*;$//) {
+ trace "$fontdef => $fontname" if FONTTBL_TRACE;
+ $fonttbl{$fontdef} = $fontname;
+ } else {
+ warn "can't analyze $fontname";
+ }
+ }
+ return;
+ } elsif ($IN_STYLESHEET) { # eg. \f1 => Normal;
+ return if $styledef; # if you have already encountered an \sn
+ $styledef = "$_[CONTROL]$_[ARG]";
+ if ($_[EVENT] eq 'start') {
+ #trace "start $_[CONTROL]$_[ARG]" if STYLESHEET;
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ my $stylename = pop_output;
+ #trace "end\n $_[CONTROL]" if STYLESHEET;
+ if ($stylename =~ s/\s*;$//) {
+ trace "$styledef => $stylename" if STYLESHEET_TRACE;
+ $stylesheet{$styledef} = $stylename;
+ } else {
+ warn "can't analyze '$stylename' ($styledef; event: $_[EVENT])";
+ }
+ }
+ $styledef = '';
+ return;
+ }
+ return if $styledef; # if you have already encountered an \sn
+ $styledef = "$_[CONTROL]$_[ARG]";
+ $stylename = $stylesheet{"$styledef"};
+ trace "$styledef => $stylename" if STYLESHEET_TRACE;
+ return unless $stylename;
+
+ if ($cstylename ne $stylename) { # notify a style changing
+ if (defined (my $action = $do_on_event{'style_change'})) {
+ ($style, $newstyle) = ($cstylename, $stylename);
+ &$action;
+ }
+ }
+ $cstylename = $stylename;
+ $par_props{'stylename'} = $cstylename; # the current style
+ },
+ #
+ # Style processing
+ #
+ 'stylesheet' => sub {
+ trace "stylesheet $#control $_[CONTROL] $_[ARG] $_[EVENT]" if STYLESHEET_TRACE;
+ if ($_[EVENT] eq 'start') {
+ $IN_STYLESHEET = 1 ;
+ push_output('nul');
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ $IN_STYLESHEET = 0;
+ pop_output;
+ }
+ },
+ 's', sub {
+ my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ $styledef = "$_[CONTROL]$_[ARG]";
+
+ if ($IN_STYLESHEET) {
+ if ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ my $stylename = pop_output;
+ warn "empty stylename" and return if $stylename eq '';
+ if ($stylename =~ s/\s*;$//) {
+ trace "$styledef => $stylename|" if STYLESHEET_TRACE;
+ $stylesheet{$styledef} = $stylename;
+ $styledef = '';
+ } else {
+ warn "can't analyze style name: '$stylename'";
+ }
+ }
+ return;
+ }
+
+ $stylename = $stylesheet{"$styledef"};
+ if ($cstylename ne $stylename) {
+ if (defined (my $action = $do_on_event{'style_change'})) {
+ ($style, $newstyle) = ($cstylename, $stylename);
+ &$action;
+ }
+ }
+ $cstylename = $stylename;
+ $par_props{'stylename'} = $cstylename; # the current style
+ trace "$styledef => $stylename" if STYLESHEET_TRACE;
+ },
+ # a very minimal table processing
+ 'trowd' => sub { # row start
+ use constant TABLE_TRACE => 0;
+ #print STDERR "=>Beginning of ROW\n";
+ unless ($IN_TABLE) {
+ $IN_TABLE = 1;
+ if (defined (my $action = $do_on_event{'table'})) {
+ $event = 'start';
+ trace "table $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+
+ push_output(); # table content
+ push_output(); # row sequence
+ push_output(); # cell sequence
+ push_output(); # cell content
+ }
+ },
+ 'intbl' => sub {
+ $par_props{'intbl'} = 1;
+ unless ($IN_TABLE) {
+ warn "ouverture en catastrophe" if TABLE_TRACE;
+ $IN_TABLE = 1;
+ if (defined (my $action = $do_on_event{'table'})) {
+ $event = 'start';
+ trace "table $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+
+ push_output();
+ push_output();
+ push_output();
+ push_output();
+ }
+ },
+ 'row' => sub { # row end
+ $text = pop_output;
+ $text = pop_output . $text;
+ if (defined (my $action = $do_on_event{'cell'})) {
+ $event = 'end';
+ trace "row $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+ $text = pop_output;
+ if (defined (my $action = $do_on_event{'row'})) {
+ $event = 'end';
+ trace "row $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+ push_output();
+ push_output();
+ push_output();
+ },
+ 'cell' => sub { # end of cell
+ trace "process cell content: $text\n" if TABLE_TRACE;
+ $text = pop_output;
+ if (defined (my $action = $do_on_event{'par'})) {
+ ($style, $event,) = ('par', 'end',);
+ &$action;
+ } else {
+ warn "$text";;
+ }
+ $text = pop_output;
+ if (defined (my $action = $do_on_event{'cell'})) {
+ $event = 'end';
+ trace "cell $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+ # prepare next cell
+ push_output();
+ push_output();
+ trace "\@output_stack in table: ", @output_stack+0 if STACK_TRACE;
+ },
+ 'par' => sub { # END OF PARAGRAPH
+ #my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
+ trace "($_[CONTROL], $_[ARG], $_[EVENT])" if STYLE_TRACE;
+ if ($IN_TABLE and not $par_props{'intbl'}) { # End of Table
+ $IN_TABLE = 0;
+ my $next_text = pop_output; # next paragraph content
+
+ $text = pop_output;
+ $text = pop_output . "$text";
+ if (defined (my $action = $do_on_event{'cell'})) { # end of cell
+ $event = 'end';
+ trace "cell $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+ $text = pop_output;
+ if (defined (my $action = $do_on_event{'row'})) { # end of row
+ $event = 'end';
+ trace "row $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+ $text = pop_output;
+ if (defined (my $action = $do_on_event{'table'})) { # end of table
+ $event = 'end';
+ trace "table $event $text\n" if TABLE_TRACE;
+ &$action;
+ }
+ push_output();
+ trace "end of table ($next_text)\n" if TABLE_TRACE;
+ output($next_text);
+ } else {
+ #push_output();
+ }
+ # paragraph style
+ if (defined($cstylename) and $cstylename ne '') { # end of previous style
+ $style = $cstylename;
+ } else {
+ $cstylename = $style = 'par'; # no better solution
+ }
+ $par_props{'stylename'} = $cstylename; # the current style
+
+ if ($par_props{intbl}) { # paragraph in tbl
+ trace "process cell content: $text\n" if TABLE_TRACE;
+ if (defined (my $action = $do_on_event{$style})) {
+ ($style, $event, $text) = ($style, 'end', pop_output);
+ &$action;
+ } elsif (defined ($action = $do_on_event{'par'})) {
+ #($style, $event, $text) = ('par', 'end', pop_output);
+ ($style, $event, $text) = ($style, 'end', pop_output);
+ &$action;
+ } else {
+ warn;
+ }
+ push_output();
+ #} elsif (defined (my $action = $do_on_event{'par_styles'})) {
+ } elsif (defined (my $action = $do_on_event{$style})) {
+ ($style, $event, $text) = ($style, 'end', pop_output);
+ &$action;
+ flush_top_output();
+ push_output();
+ } elsif (defined ($action = $do_on_event{'par'})) {
+ #($style, $event, $text) = ('par', 'end', pop_output);
+ ($style, $event, $text) = ($style, 'end', pop_output);
+ &$action;
+ flush_top_output();
+ push_output();
+ } else {
+ trace "no definition for '$style' in %do_on_event\n" if STYLE_TRACE;
+ flush_top_output();
+ push_output();
+ }
+ # redefine this!!!
+ $cli = $par_props{'li'};
+ $styledef = '';
+ $par_props{'bullet'} = $par_props{'number'} = $par_props{'tab'} = 0; #
+ },
+ # Resets to default paragraph properties
+ # Stop inheritence of paragraph properties
+ 'pard' => sub {
+ # !!!-> reset_par_props()
+ foreach (qw(qj qc ql qr intbl li)) {
+ $par_props{$_} = 0;
+ }
+ foreach (qw(list_item)) {
+ $par_props{$_} = '';
+ }
+ },
+ # ####################
+ # Fields and Bookmarks
+# 'field' => sub { # for a future version
+# use constant FIELD_TRACE => 0;
+# if ($_[EVENT] eq 'start') {
+# push_output();
+# $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+# $field_ref = '';
+# } else {
+# #trace "$_[CONTROL] content: ", pop_output();
+# if (defined (my $action = $do_on_event{'field'})) {
+# ($style, $event, $text) = ($style, 'end', pop_output);
+# &$action($field_ref);
+# }
+# }
+# },
+
+ # don't uncomment!!!
+# 'fldrslt' => sub {
+# return;
+# if ($_[EVENT] eq 'start') {
+# push_output();
+# $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+# } else {
+# #trace "$_[CONTROL] content: ", pop_output();
+# pop_output();
+# }
+# },
+ # uncomment!!!
+ # eg: {\*\fldinst {\i0 REF version \\* MERGEFORMAT }}
+# '*fldinst' => sub { # Destination
+# my $string = $_[EVENT];
+# trace "$_[CONTROL] content: $string" if FIELD_TRACE;
+# $string =~ /\b(REF|PAGEREF)\s+(_\w\w\w\d+)/i;
+# $field_ref = $2;
+# # PerlBug???; $_[CONTROL] == $1 - very strange
+# trace "$_[CONTROL] content: $string -> $2" if FIELD_TRACE;
+# trace "$_[1] content: $string -> $2" if FIELD_TRACE;
+# if (defined (my $action = $do_on_event{'field'})) {
+# ($style, $event, $text) = ($style, 'start', '');
+# &$action($field_ref);
+# }
+# },
+# # Bookmarks
+# '*bkmkstart' => sub { # destination
+# my $string = $_[EVENT];
+# if (defined (my $action = $do_on_event{'bookmark'})) {
+# $string =~ /(_\w\w\w\d+)/; # !!!
+# trace "$_[CONTROL] content: $string -> $1" if TRACE;
+# ($style, $event, $text) = ($style, 'start', $1);
+# &$action;
+# }
+# },
+# '*bkmkend' => sub { # destination
+# my $string = $_[EVENT];
+# if (defined (my $action = $do_on_event{'bookmark'})) {
+# $string =~ /(_\w\w\w\d+)/; # !!!
+# ($style, $event, $text) = ($style, 'end', $1);
+# &$action;
+# }
+# },
+ # ###########################
+ 'pn' => sub { # Turn on PARAGRAPH NUMBERING
+ #trace "($_[CONTROL], $_[ARG], $_[EVENT])" if TRACE;
+ if ($_[EVENT] eq 'start') {
+ %pn = ();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ # I don't like this!!! redesign the parser???
+ trace("Level: $pn{level} - Type: $pn{type} - Bullet: $pn{bullet}") if LIST_TRACE;
+ $par_props{list_item} = \%pn;
+ }
+ },
+ 'pnlvl' => sub { # Paragraph level $_[ARG] is a level from 1 to 9
+ $pn{level} = $_[ARG];
+ },
+ 'pnlvlbody' => sub { # Paragraph level 10
+ $pn{level} = 10;
+ },
+ 'pnlvlblt' => sub { # Paragraph level 11, processs the 'pntxtb' group
+ $pn{level} = 11; # bullet
+ },
+ 'pntxtb' => sub {
+ if ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ $pn{'bullet'} = pop_output();
+ }
+ },
+ 'pntxta' => sub {
+ if ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ pop_output();
+ }
+ },
+ # Numbering Types
+ 'pncard' => sub { # Cardinal numbering: One, Two, Three
+ $pn{type} = $_[CONTROL];
+ },
+ 'pndec' => sub { # Decimal numbering: 1, 2, 3
+ $pn{type} = $_[CONTROL];
+ },
+ 'pnucltr' => sub { # Uppercase alphabetic numbering
+ $pn{type} = $_[CONTROL];
+ },
+ 'pnlcltr' => sub { # Lowercase alphabetic numbering
+ $pn{type} = $_[CONTROL];
+ },
+ 'pnucrm' => sub { # Uppercase roman numbering
+ $pn{type} = $_[CONTROL];
+ },
+ 'pnlcrm' => sub { # Lowercase roman numbering
+ $pn{type} = $_[CONTROL];
+ },
+ 'pntext' => sub { # ignore text content
+ if ($_[EVENT] eq 'start') {
+ push_output();
+ $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
+ } else {
+ pop_output();
+ }
+ },
+ #'tab' => sub { $par_props{'tab'} = 1 }, # special char
+
+ 'li' => sub { # line indent - value
+ use constant LI_TRACE => 0;
+ my $indent = $_[ARG];
+ $indent =~ s/^-//;
+ trace "line indent: $_[ARG] -> $indent" if LI_TRACE;
+ $par_props{'li'} = $indent;
+ },
+ );
+###########################################################################
+ # Parser callback definitions
+use constant GROUP_START_TRACE => 0;
sub group_start { # on {
-
my $self = shift;
-
trace "" if GROUP_START_TRACE;
-
- # Take a copy of the parent block's paragraph properties
- push @par_props_stack, { %par_props };
-
- # Take a copy of the parent block's character properties
- push @char_props_stack, { %char_props };
-
- # Aha! More accurately, controls we've opened, so we can close them in group_end()
- push @control, {}; # hash of controls
-
-
+ push @par_props_stack, { %par_props };
+ push @char_props_stack, { %char_props };
+ push @control, {}; # hash of controls
}
-
+use constant GROUP_END_TRACE => 0;
sub group_end { # on }
# par properties
-
- # Retrieve parent block's paragraph properties
- %par_props = %{ pop @par_props_stack };
-
- # And use it to set the current stylename
- $cstylename = $par_props{'stylename'}; # the current style
+ %par_props = %{ pop @par_props_stack };
+ $cstylename = $par_props{'stylename'}; # the current style
# Char properties
# process control like \b0
-
- # Grab the character properties of our parent
- %char_props = %{ pop @char_props_stack };
-
- # Fire off the 'char props have changed' event
- $char_prop_change = 1;
- output process_char_props();
-
- # Always a /really/ /really/ bad sign :-(
- no strict qw/refs/;
+ %char_props = %{ pop @char_props_stack };
+ $char_prop_change = 1;
+ output process_char_props();
- # Send an end thingy to each control we're closing
+ no strict qw/refs/;
foreach my $control (keys %{pop @control}) { # End Events!
$control =~ /([^\d]+)(\d+)?/; # eg: b0, b1
trace "($#control): $1-$2" if GROUP_END_TRACE;
@@ -1113,19 +955,12 @@ sub group_end { # on }
&{"RTF::Action::$1"}($_[SELF], $1, $2, 'end');
}
}
-
-# Just dump text
+use constant TEXT_TRACE => 0;
sub text {
-
trace "$_[1]" if TEXT_TRACE;
output($_[1]);
-
}
-
-# If we have an equiv, print it, otherwise, print the original
-
-sub char {
-
+sub char {
if (defined(my $char = $charset{$_[1]})) {
#print STDERR "$_[1] => $char\n";
output "$char";
@@ -1133,873 +968,42 @@ sub char {
output "$_[1]";
}
}
-
sub symbol { # symbols: \ - _ ~ : | { } * \'
-
if (defined(my $sym = $symbol{$_[1]})) {
output "$sym";
} else {
output "$_[1]"; # as it
}
}
+use constant PARSE_START_END => 0;
+sub parse_start {
+ my $self = shift;
+ # some initializations
+ %info = ();
+ %fonttbl = ();
+ %colortbl = ();
+ %stylesheet = ();
-
-
-
-sub debug {
-
- my $function = shift;
-
- print STDERR "[RTF::Control::$function]" . (join '|', @_ ) , "\n";
-
+ push_output();
+ if (defined (my $action = $do_on_event{'document'})) {
+ $event = 'start';
+ &$action;
+ }
+ flush_top_output();
+ push_output();
+}
+sub parse_end {
+ my $self = shift;
+ my $action = '';
+ trace "parseEnd \@output_stack: ", @output_stack+0 if STACK_TRACE;
+
+ if (defined ($action = $do_on_event{'document'})) {
+ ($style, $event, $text) = ($cstylename, 'end', '');
+ &$action;
+ }
+ flush_top_output(); # @output_stack == 2;
}
-
-
-
-
-
-
-
-
-%do_on_control =
- (
- %do_on_control,
- %flag_ctrl,
- %value_ctrl,
- %symbol_ctrl,
- %destination_ctrl,
-
- # Resets character formatting in scope... Note how we don't
- # check for start and end events? My guess is this is because
- # the original author is a BAD BAD MAN, and because running
- # reset_char_props() when \plain goes out of scope doesn't
- # cause any side effects. Something to experiment with when
- # I have a regression test suite...
- ###########################################################
-
- 'plain' => sub {
-
- reset_char_props();
-
- },
-
- ###########################################################
-
- # The only thing puzzling me here is why we're doing a null
- # call to push_output. This (and other subroutines below)
- # are ripe for a bit of refactoring - they all do the same
- # thing!
- ###########################################################
-
- 'rtf' => sub { # rtfN, N is version number
-
- if ($_[EVENT] eq 'start') {
-
- push_output('nul');
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- # There may actually be content at this point!
- flush_top_output();
-
- # The buffer should be empty at this point.
- # Make it so :-) This should use an RTF::Tokenizer
- # method before I release this as production.
- # TODO...
-
- $_[SELF]->{_TOKENIZER}->{_BUFFER} = '';
- $_[SELF]->{_TOKENIZER}->{_FILEHANDLE} = '';
-
- }
-
- },
-
- ###########################################################
-
-
-
- # Info group. The &do_on_info sub is trivial, and merely puts
- # the rest of the text in a destination into %info, with the
- # key being the field (like 'title'). creatim is kinda clever
- # then in that it turns the rest of those fields into one
- # long text string.
- #
- # Other information we could grab:
- # {\printim\yr1997\mo11\dy3\hr11\min5}
- # {\version3}{\edmins1}{\nofpages3}{\nofwords1278}{\nofchars7287}
- # {\*\company SONOVISION-ITEP}{\vern57443}
- ###########################################################
-
- 'info' => sub { # {\info {...}}
-
- if ($_[EVENT] eq 'start') {
-
- # Stops us collecting any text we don't want
-
- push_output('nul');
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- pop_output();
-
- }
-
- },
-
- 'title' => \&do_on_info, # destination
- 'author' => \&do_on_info, # destination
- 'revtim' => \&do_on_info, # destination
- 'creatim' => \&do_on_info, # destination, {\creatim\yr1996\mo9\dy18\hr9\min17}
- 'yr' => sub { output "$_[ARG]-" }, # value
- 'mo' => sub { output "$_[ARG]-" }, # value
- 'dy' => sub { output "$_[ARG]-" }, # value
- 'hr' => sub { output "$_[ARG]-" }, # value
- 'min' => sub { output "$_[ARG]" }, # value
-
- ###########################################################
-
- # Read binary data - only, this function has been removed
- # from RTF::Parser.pm. Ooops. Add it back in and PUT IN
- # A TEST.
- ###########################################################
-
- 'bin' => sub { $_[SELF]->read_bin($_[ARG]) }, # value
-
- # \ulnone should be treated as if it were \ul0...
- ###########################################################
-
- 'ulnone' => sub {
-
- $_[SELF]->do_on_char_prop( 'ul', '0', 'start' );
-
- },
-
- # Clearly we're not interested in the colour table....
- ###########################################################
-
- 'colortbl' => \&discard_content,
-
- # The start of the font-table. There's a global(ish) flag
- # $IN_FONTTBL that influences how other parts of the module
- # work. The main thing we do is turn this flag on when we
- # get to this point. The 'push_output('nul')' also turns
- # off any output while we're in the font table.
- ###########################################################
-
- 'fonttbl' => sub {
-
- if ($_[EVENT] eq 'start') {
-
- # Set the global flag
- $IN_FONTTBL = 1;
-
- # Turn off output
- push_output('nul');
-
- # Remember that this event has fired, and close it
- # when we go out of scope.
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- $IN_FONTTBL = 0 ;
- pop_output();
-
- }
-
- },
-
- ###########################################################
-
- # We seem to not want anything to do with the filetable
- # either - I guess the reason we define a control for it
- # (because otherwise it'd get skipped as an unknow destination
- # I think) is so that subclassers can handle it if they
- # want.
- ###########################################################
-
- 'filetbl' => sub {
-
- #trace "$#control $_[CONTROL] $_[ARG] $_[EVENT]";
- if ($_[EVENT] eq 'start') {
-
- push_output('nul');
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- pop_output();
-
- }
-
- },
-
- ###########################################################
-
-
- # A font control - highly context-dependant control word ... Can be used
- # to introduce a font definition when we're in the font-table, to specify
- # which font a style uses in the style-table, or to change the font we're
- # currently using when used as a paragraph/character property.
- ###########################################################
-
- 'f' => sub {
-
- use constant FONTTBL_TRACE => 0; # if you want to see the fonttbl of the document
-
- # We're in the middle of the font-table, so this is a font definition.
- # We're only really interested in what happens when we pass *out* of
- # scope, because at that point we'll have grabbed the font-name. I'd
- # like to add panose support at some point.
-
- if ($IN_FONTTBL) {
-
- if ($_[EVENT] eq 'start') {
-
- # Add a new element to the output stack that we can
- # snarf back in a minute when we hit the group close
-
- push_output();
-
- # Say we're open
-
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- # Grab the element from the output stack, which'll be
- # our fontname
-
- my $fontname = pop_output;
-
- # This will be something like 'f1'
-
- my $fontdef = "$_[CONTROL]$_[ARG]";
-
- # Remove the trailing semi-colon and any space
-
- if ($fontname =~ s/\s*;$//) {
-
- trace "$fontdef => $fontname" if FONTTBL_TRACE;
-
- # Set the fontdef and the fontname in the font-table hash
-
- $fonttbl{$fontdef} = $fontname;
-
- } else {
-
- warn "can't analyze $fontname";
-
- }
-
- }
-
- return;
-
- # We're in the style sheet. This part doesn't make much sense
- # just yet, will come back to it. Looks like \f is being used
- # to recognise when a style definition is finished?! Bizarre.
-
- } elsif ($IN_STYLESHEET) { # eg. \f1 => Normal;
-
- return if $styledef; # if you have already encountered an \sn
- $styledef = "$_[CONTROL]$_[ARG]";
-
- if ($_[EVENT] eq 'start') {
-
- #trace "start $_[CONTROL]$_[ARG]" if STYLESHEET;
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- my $stylename = pop_output;
- #trace "end\n $_[CONTROL]" if STYLESHEET;
-
- if ($stylename =~ s/\s*;$//) {
-
- trace "$styledef => $stylename" if STYLESHEET_TRACE;
- $stylesheet{$styledef} = $stylename;
-
- } else {
-
- warn "can't analyze '$stylename' ($styledef; event: $_[EVENT])";
-
- }
-
- }
-
- $styledef = '';
- return;
-
- }
-
- return if $styledef; # if you have already encountered an \sn
-
- # This doesn't make a great deal of sense
- $styledef = "$_[CONTROL]$_[ARG]";
- $stylename = $stylesheet{"$styledef"};
- trace "$styledef => $stylename" if STYLESHEET_TRACE;
-
- return unless $stylename;
-
- if ($cstylename ne $stylename) { # notify a style changing
-
- if (defined (my $action = $do_on_event{'style_change'})) {
-
- ($style, $newstyle) = ($cstylename, $stylename);
- &$action;
-
- }
-
- }
-
- $cstylename = $stylename;
- $par_props{'stylename'} = $cstylename; # the current style
-
- },
-
- ###########################################################
-
- # Stylesheet - like font-table above, we set the flag, and
- # make sure we don't grab any unwanted text...
-
- 'stylesheet' => sub {
-
- trace "stylesheet $#control $_[CONTROL] $_[ARG] $_[EVENT]" if STYLESHEET_TRACE;
-
- if ($_[EVENT] eq 'start') {
-
- $IN_STYLESHEET = 1 ;
- push_output('nul');
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- $IN_STYLESHEET = 0;
- pop_output;
-
- }
- },
-
- ###########################################################
-
- # Stylesheet definition
- ###########################################################
-
- 's' => sub {
-
- my($control, $arg, $cevent) = ($_[CONTROL], $_[ARG], $_[EVENT]);
-
- $styledef = "$_[CONTROL]$_[ARG]";
-
- # This looks pretty much identical to \f - only, looking at it,
- # it probably doesn't work. My head hurts.
-
- if ($IN_STYLESHEET) {
-
- if ($_[EVENT] eq 'start') {
-
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- my $stylename = pop_output;
- warn "empty stylename" and return if $stylename eq '';
-
- if ($stylename =~ s/\s*;$//) {
-
- trace "$styledef => $stylename|" if STYLESHEET_TRACE;
- $stylesheet{$styledef} = $stylename;
- $styledef = '';
-
- } else {
-
- warn "can't analyze style name: '$stylename'";
-
- }
-
- }
-
- return;
-
- }
-
- $stylename = $stylesheet{"$styledef"};
-
- if ($cstylename ne $stylename) {
-
- if (defined (my $action = $do_on_event{'style_change'})) {
-
- ($style, $newstyle) = ($cstylename, $stylename);
-
- &$action;
-
- }
-
- }
-
- $cstylename = $stylename;
- $par_props{'stylename'} = $cstylename; # the current style
- trace "$styledef => $stylename" if STYLESHEET_TRACE;
-
- },
-
- ###########################################################
-
-
- # Tells us we're starting a row...
- ###########################################################
-
- 'trowd' => sub {
-
- use constant TABLE_TRACE => 0;
-
- #print STDERR "=>Beginning of ROW\n";
-
- # If we're not in a table...
-
- unless ($IN_TABLE) {
-
- # Set the flag to say we now are
-
- $IN_TABLE = 1;
-
- # Fire off a table even if we have one
-
- if (defined (my $action = $do_on_event{'table'})) {
-
- $event = 'start';
- trace "table $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
-
- # Add lots of output holders for various things...
-
- push_output(); # table content
- push_output(); # row sequence
- push_output(); # cell sequence
- push_output(); # cell content
-
- }
-
- },
-
- # Perhaps the control that opens a table? Never the less,
- # an exact clone of the function above!
- ###########################################################
-
-
- 'intbl' => sub {
-
- $par_props{'intbl'} = 1;
-
- unless ($IN_TABLE) {
-
- warn "ouverture en catastrophe" if TABLE_TRACE;
- $IN_TABLE = 1;
-
- if (defined (my $action = $do_on_event{'table'})) {
-
- $event = 'start';
- trace "table $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
-
- push_output();
- push_output();
- push_output();
- push_output();
-
- }
-
- },
-
- # The end of a row
- ###########################################################
-
-
- 'row' => sub { # row end
-
- # Grab the cell and the 'cell sequence'
-
- $text = pop_output;
- $text = pop_output . $text;
-
- # Fire off the 'end cell' handler if we have one
-
- if (defined (my $action = $do_on_event{'cell'})) {
-
- $event = 'end';
- trace "row $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
-
- # Grab any row text
-
- $text = pop_output;
-
- # Fire off the end-row event
-
- if (defined (my $action = $do_on_event{'row'})) {
-
- $event = 'end';
- trace "row $event $text\n" if TABLE_TRACE;
- &$action;
- }
-
- # Prep the next row
-
- push_output();
- push_output();
- push_output();
-
- },
-
- ###########################################################
-
- # End of a cell
- ###########################################################
-
-
- 'cell' => sub { # end of cell
-
- trace "process cell content: $text\n" if TABLE_TRACE;
- $text = pop_output;
-
- # Fire the paragraph handler
-
- if (defined (my $action = $do_on_event{'par'})) {
-
- ($style, $event,) = ('par', 'end',);
- &$action;
-
- } else {
-
- warn "$text";;
-
- }
-
- $text = pop_output;
-
- # Fire the end-cell handler
-
- if (defined (my $action = $do_on_event{'cell'})) {
-
- $event = 'end';
- trace "cell $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
- # prepare next cell
- push_output();
- push_output();
- trace "\@output_stack in table: ", @output_stack+0 if STACK_TRACE;
-
- },
-
- ###########################################################
-
-
- # And thus the paragraph ends
- ###########################################################
-
- 'par' => sub { # END OF PARAGRAPH
-
- trace "($_[CONTROL], $_[ARG], $_[EVENT])" if STYLE_TRACE;
-
- # Close a table. Add to $text, and call even handlers
- # for cell, row, and table, in order.
-
- if ($IN_TABLE and not $par_props{'intbl'}) { # End of Table
-
- $IN_TABLE = 0;
- my $next_text = pop_output; # next paragraph content
-
- $text = pop_output;
- $text = pop_output . "$text";
-
- if (defined (my $action = $do_on_event{'cell'})) { # end of cell
-
- $event = 'end';
- trace "cell $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
-
- $text = pop_output;
-
- if (defined (my $action = $do_on_event{'row'})) { # end of row
-
- $event = 'end';
- trace "row $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
-
- $text = pop_output;
-
- if (defined (my $action = $do_on_event{'table'})) { # end of table
-
- $event = 'end';
- trace "table $event $text\n" if TABLE_TRACE;
- &$action;
-
- }
-
- push_output();
- trace "end of table ($next_text)\n" if TABLE_TRACE;
- output($next_text);
-
- } else {
-
- #push_output();
-
- }
-
- # paragraph style
- if (defined($cstylename) and $cstylename ne '') { # end of previous style
-
- $style = $cstylename;
-
- } else {
-
- $cstylename = $style = 'par'; # no better solution
-
- }
-
- $par_props{'stylename'} = $cstylename; # the current style
-
- if ($par_props{intbl}) { # paragraph in tbl
-
- trace "process cell content: $text\n" if TABLE_TRACE;
-
- if (defined (my $action = $do_on_event{$style})) {
-
- ($style, $event, $text) = ($style, 'end', pop_output);
- &$action;
-
- } elsif (defined ($action = $do_on_event{'par'})) {
-
- #($style, $event, $text) = ('par', 'end', pop_output);
- ($style, $event, $text) = ($style, 'end', pop_output);
- &$action;
-
- } else {
-
- warn;
-
- }
-
- push_output();
-
- #} elsif (defined (my $action = $do_on_event{'par_styles'})) {
- } elsif (defined (my $action = $do_on_event{$style})) {
-
- ($style, $event, $text) = ($style, 'end', pop_output);
- &$action;
- flush_top_output();
- push_output();
-
- } elsif (defined ($action = $do_on_event{'par'})) {
-
- #($style, $event, $text) = ('par', 'end', pop_output);
- ($style, $event, $text) = ($style, 'end', pop_output);
- &$action;
- flush_top_output();
- push_output();
-
- } else {
-
- trace "no definition for '$style' in %do_on_event\n" if STYLE_TRACE;
- flush_top_output();
- push_output();
-
- }
- # redefine this!!!
- $cli = $par_props{'li'};
- $styledef = '';
- $par_props{'bullet'} = $par_props{'number'} = $par_props{'tab'} = 0; #
-
- },
- # Resets to default paragraph properties
- # Stop inheritence of paragraph properties
-
-
- 'pard' => sub {
-
- # !!!-> reset_par_props()
- foreach (qw(qj qc ql qr intbl li)) {
-
- $par_props{$_} = 0;
-
- }
-
- foreach (qw(list_item)) {
-
- $par_props{$_} = '';
-
- }
-
- },
-
- # ###########################
-
- 'pn' => sub { # Turn on PARAGRAPH NUMBERING
-
- #trace "($_[CONTROL], $_[ARG], $_[EVENT])" if TRACE;
-
- if ($_[EVENT] eq 'start') {
-
- %pn = ();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- # I don't like this!!! redesign the parser???
- trace("Level: $pn{level} - Type: $pn{type} - Bullet: $pn{bullet}") if LIST_TRACE;
- $par_props{list_item} = \%pn;
-
- }
-
- },
-
-
- 'pnlvl' => sub { # Paragraph level $_[ARG] is a level from 1 to 9
-
- $pn{level} = $_[ARG];
-
- },
-
- 'pnlvlbody' => sub { # Paragraph level 10
-
- $pn{level} = 10;
-
- },
-
- 'pnlvlblt' => sub { # Paragraph level 11, processs the 'pntxtb' group
-
- $pn{level} = 11; # bullet
-
- },
-
- 'pntxtb' => sub {
-
- if ($_[EVENT] eq 'start') {
-
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- $pn{'bullet'} = pop_output();
-
- }
-
- },
-
- 'pntxta' => sub {
-
- if ($_[EVENT] eq 'start') {
-
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- pop_output();
-
- }
-
- },
- # Numbering Types
- 'pncard' => sub { # Cardinal numbering: One, Two, Three
- $pn{type} = $_[CONTROL];
- },
-
- 'pndec' => sub { # Decimal numbering: 1, 2, 3
- $pn{type} = $_[CONTROL];
- },
-
- 'pnucltr' => sub { # Uppercase alphabetic numbering
- $pn{type} = $_[CONTROL];
- },
-
- 'pnlcltr' => sub { # Lowercase alphabetic numbering
- $pn{type} = $_[CONTROL];
- },
-
- 'pnucrm' => sub { # Uppercase roman numbering
- $pn{type} = $_[CONTROL];
- },
-
- 'pnlcrm' => sub { # Lowercase roman numbering
- $pn{type} = $_[CONTROL];
- },
-
-
- 'pntext' => sub { # ignore text content
-
- if ($_[EVENT] eq 'start') {
-
- push_output();
- $control[TOP]->{"$_[CONTROL]$_[ARG]"} = 1;
-
- } else {
-
- pop_output();
-
- }
-
- },
- #'tab' => sub { $par_props{'tab'} = 1 }, # special char
-
- 'li' => sub { # line indent - value
-
- use constant LI_TRACE => 0;
- my $indent = $_[ARG];
- $indent =~ s/^-//;
- trace "line indent: $_[ARG] -> $indent" if LI_TRACE;
- $par_props{'li'} = $indent;
-
- },
-);
-###########################################################################
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
use vars qw(%not_processed);
END {
if (@control) {
@@ -1,49 +0,0 @@
-package RTF::HTML::Converter::ansi;
-
-my @data = (<DATA>);
-chomp(@data);
-
-sub data {
-
- return @data;
-
-}
-
-1;
-
-__DATA__
-00 `
-01 ´
-02 ^
-03 ~
-04 ­
-05 ­
-06 °
-07 ¨
-08 ·
-20 --
-30 _
-82 ,
-83 f
-84 ,,
-85 ...
-86 +
-87 ++
-88 ^
-89 0/00
-8a S
-8b <
-8c OE
-91 `
-92 '
-93 ``
-94 ''
-95 ·
-96 -
-97 --
-98 ~
-99 [tm]
-9a s
-9b >
-9c oe
-9f Y
@@ -1,197 +0,0 @@
-package RTF::HTML::Converter::charmap;
-
-my @data = (<DATA>);
-chomp(@data);
-
-sub data {
-
- return @data;
-
-}
-
-1;
-
-__DATA__
-exclam !
-quotedbl "
-numbersign #
-dollar $
-percent %
-ampersand &
-quoteright '
-parenleft (
-parenright )
-asterisk *
-plus +
-comma ,
-hyphen -
-period .
-slash /
-zero 0
-one 1
-two 2
-three 3
-four 4
-five 5
-six 6
-seven 7
-eight 8
-nine 9
-colon :
-semicolon ;
-less <
-equal =
-greater >
-question ?
-at @
-bracketleft [
-backslash \
-bracketright ]
-asciicircum ^
-underscore _
-quoteleft `
-braceleft {
-bar |
-braceright }
-asciitilde ~
-OE OE
-acute '
-angleleft [
-angleright >
-approxequal ~
-arrowboth <->
-arrowdblboth <=>
-arrowdblleft <=
-arrowdblright =>
-arrowleft <-
-arrowright ->
-bullet *
-cent ¢
-circumflex ^
-copyright ©
-copyrightsans ©
-dagger +
-degree °
-delta d
-divide ÷
-dotlessi i
-ellipsis ...
-emdash --
-endash -
-fi fi
-fl fl
-fraction /
-grave `
-greaterequal >=
-guillemotleft «
-guillemotright »
-guilsinglleft <
-guilsinglright >
-lessequal <=
-logicalnot ¬
-mathasterisk *
-mathequal =
-mathminus -
-mathnumbersign #
-mathplus +
-mathtilde ~
-minus -
-mu µ
-multiply ×
-nobrkhyphen -
-nobrkspace  
-notequal !=
-oe oe
-onehalf ½
-onequarter ¼
-periodcentered .
-plusminus ±
-quotedblbase ,,
-quotedblleft "
-quotedblright "
-quotesinglbase ,
-registered ®
-registersans ®
-threequarters ¾
-tilde ~
-trademark [tm]
-AE Æ
-Aacute Á
-Acircumflex Â
-Agrave À
-Aring Å
-Atilde Ã
-Adieresis Ä
-Ccedilla Ç
-Eth Ð
-Eacute É
-Ecircumflex Ê
-Egrave È
-Edieresis Ë
-Iacute Í
-Icircumflex Î
-Igrave Ì
-Idieresis Ï
-Ntilde Ñ
-Oacute Ó
-Ocircumflex Ô
-Ograve Ò
-Oslash Ø
-Otilde Õ
-Odieresis Ö
-Thorn Þ
-Uacute Ú
-Ucircumflex Û
-Ugrave Ù
-Udieresis Ü
-Yacute Ý
-ae æ
-aacute á
-acircumflex â
-agrave à
-aring å
-atilde ã
-adieresis ä
-ccedilla ç
-eacute é
-ecircumflex ê
-egrave è
-eth ð
-edieresis ë
-iacute í
-icircumflex î
-igrave ì
-idieresis ï
-ntilde ñ
-oacute ó
-ocircumflex ô
-ograve ò
-oslash ø
-otilde õ
-odieresis ö
-germandbls ß
-thorn þ
-uacute ú
-ucircumflex û
-ugrave ù
-udieresis ü
-yacute ý
-ydieresis ÿ
-newline <br>
-ordfeminine ª
-ordmasculine º
-questiondown ¿
-exclamdown ¡
-section §
-onesuperior ¹
-twosuperior ²
-threesuperior ³
-sterling £
-currency ¤
-yen ¥
-brokenbar ¦
-dieresis ¨
-opthyphen ­
-macron ¯
-paragraph ¶
-cedilla ¸
@@ -3,9 +3,6 @@ use strict;
package RTF::HTML::Converter;
use RTF::Control;
-use RTF::HTML::Converter::ansi;
-use RTF::HTML::Converter::charmap;
-
@RTF::HTML::Converter::ISA = qw(RTF::Control);
use constant TRACE => 0;
@@ -14,87 +11,6 @@ use constant SHOW_STYLE_NOT_PROCESSED => 1;
use constant SHOW_STYLE => 0; # insert style name in the output
use constant SHOW_RTF_LINE_NUMBER => 0;
-use constant RTF_DEBUG => 0;
-
-
-
-=head1 NAME
-
-RTF::HTML::Converter - Perl extension for converting RTF into HTML
-
-=head1 DESCRIPTION
-
-Perl extension for converting RTF into HTML
-
-=head1 SYNOPSIS
-
- use strict;
- use RTF::HTML::Converter;
-
- my $object = RTF::HTML::Converter->new(
-
- output => \*STDOUT
-
- );
-
- $object->parse_stream( \*RTF_FILE );
-
-OR
-
- use strict;
- use RTF::HTML::Converter;
-
- my $object = RTF::HTML::Converter->new(
-
- output => \$string
-
- );
-
- $object->parse_string( $rtf_data );
-
-=head1 METHODS
-
-=head2 new()
-
-Constructor method. Currently takes one named parameter, C<output>,
-which can either be a reference to a filehandle, or a reference to
-a string. This is where our HTML will end up.
-
-=head2 parse_stream()
-
-Read RTF in from a filehandle, and start processing it. Pass me
-a reference to a filehandle.
-
-=head2 parse_string()
-
-Read RTF in from a string, and start processing it. Pass me a string.
-
-=head1 JUST SO YOU KNOW
-
-You can mix-and-match your output and input methods - nothing to stop
-you outputting to a string when you've read from a filehandle...
-
-=head1 AUTHOR
-
-Peter Sergeant C<rtf.parser@clueball.com>, originally by Philippe Verdret
-
-=head1 COPYRIGHT
-
-Copyright 2004 B<Pete Sergeant>.
-
-This program is free software; you can redistribute it and/or modify it under
-the same terms as Perl itself.
-
-=head1 CREDITS
-
-This work was carried out under a grant generously provided by The Perl Foundation -
-give them money!
-
-
-=cut
-
-
-
# Symbol exported by the RTF::Ouptut module:
# %info: informations of the {\info ...}
# %par_props: paragraph properties
@@ -119,8 +35,6 @@ my $CURRENT_LI = 0; # current list indent
my @LIST_STACK = (); # stack of opened lists
my %LI_LEVEL = (); # li -> list level
-my %charmap_defaults = map({ sprintf("%02x", $_) => "&#$_;" } (0..255));
-
my %PAR_ALIGN = qw(
qc CENTER
ql LEFT
@@ -378,9 +292,6 @@ use constant GEN_TAGS_WARNS => 1;
my @element_stack = ();
my %open_element = ();
sub gen_tags { # manage a minimal context for tag outputs
-
- debug( 'gen_tags', @_ ) if RTF_DEBUG > 5;
-
die "bad argument number" unless (@_ >= 2);
my ($eve, $tag, $att) = @_;
@@ -427,13 +338,15 @@ $do_on_control{'ansi'} = # callback redefinition
sub {
# RTF: \'<hex value>
# HTML: &#<dec value>;
-
- my @charmap_data = $_[SELF]->charmap_reader( $_[CONTROL] );
+ my $charset = $_[CONTROL];
+ my $charset_file = $_[SELF]->application_dir() . "/$charset";
+ open CHAR_MAP, "$charset_file"
+ or die "unable to open the '$charset_file': $!";
my %charset = ( # general rule
- %charmap_defaults,
+ map({ sprintf("%02x", $_) => "&#$_;" } (0..255)),
# and some specific defs
- map({ s/^\s+//; split /\s+/ } @charmap_data)
+ map({ s/^\s+//; split /\s+/ } (<CHAR_MAP>))
);
*char = sub {
my $char_props;
@@ -447,7 +360,6 @@ $do_on_control{'ansi'} = # callback redefinition
}
};
-
# symbol processing
# RTF: \~
# named chars
@@ -457,11 +369,7 @@ $symbol{'tab'} = ' '; #' ';
$symbol{'ldblquote'} = '«';
$symbol{'rdblquote'} = '»';
$symbol{'line'} = '<br>';
-sub symbol {
-
- debug( 'symbol', @_ ) if RTF_DEBUG > 5;
-
-
+sub symbol {
my $char_props;
if ($START_NEW_PARA) {
$char_props = $_[SELF]->force_char_props('start');
@@ -478,10 +386,6 @@ sub symbol {
# Text
# certainly do the same thing with the char() method
sub text { # parser callback redefinition
-
- debug( 'text', @_ ) if RTF_DEBUG > 5;
-
-
my $text = $_[1];
my $char_props = '';
if ($START_NEW_PARA) {
@@ -500,13 +404,5 @@ sub text { # parser callback redefinition
}
}
-sub debug {
-
- my $function = shift;
-
- print STDERR "[RTF::HTML::Converter::$function]" . (join '|', @_ ) , "\n";
-
-}
-
1;
__END__
@@ -0,0 +1,35 @@
+00 `
+01 ´
+02 ^
+03 ~
+04 ­
+05 ­
+06 °
+07 ¨
+08 ·
+20 --
+30 _
+82 ,
+83 f
+84 ,,
+85 ...
+86 +
+87 ++
+88 ^
+89 0/00
+8a S
+8b <
+8c OE
+91 `
+92 '
+93 ``
+94 ''
+95 ·
+96 -
+97 --
+98 ~
+99 [tm]
+9a s
+9b >
+9c oe
+9f Y
@@ -0,0 +1,183 @@
+exclam !
+quotedbl "
+numbersign #
+dollar $
+percent %
+ampersand &
+quoteright '
+parenleft (
+parenright )
+asterisk *
+plus +
+comma ,
+hyphen -
+period .
+slash /
+zero 0
+one 1
+two 2
+three 3
+four 4
+five 5
+six 6
+seven 7
+eight 8
+nine 9
+colon :
+semicolon ;
+less <
+equal =
+greater >
+question ?
+at @
+bracketleft [
+backslash \
+bracketright ]
+asciicircum ^
+underscore _
+quoteleft `
+braceleft {
+bar |
+braceright }
+asciitilde ~
+OE OE
+acute '
+angleleft [
+angleright >
+approxequal ~
+arrowboth <->
+arrowdblboth <=>
+arrowdblleft <=
+arrowdblright =>
+arrowleft <-
+arrowright ->
+bullet *
+cent ¢
+circumflex ^
+copyright ©
+copyrightsans ©
+dagger +
+degree °
+delta d
+divide ÷
+dotlessi i
+ellipsis ...
+emdash --
+endash -
+fi fi
+fl fl
+fraction /
+grave `
+greaterequal >=
+guillemotleft «
+guillemotright »
+guilsinglleft <
+guilsinglright >
+lessequal <=
+logicalnot ¬
+mathasterisk *
+mathequal =
+mathminus -
+mathnumbersign #
+mathplus +
+mathtilde ~
+minus -
+mu µ
+multiply ×
+nobrkhyphen -
+nobrkspace  
+notequal !=
+oe oe
+onehalf ½
+onequarter ¼
+periodcentered .
+plusminus ±
+quotedblbase ,,
+quotedblleft "
+quotedblright "
+quotesinglbase ,
+registered ®
+registersans ®
+threequarters ¾
+tilde ~
+trademark [tm]
+AE Æ
+Aacute Á
+Acircumflex Â
+Agrave À
+Aring Å
+Atilde Ã
+Adieresis Ä
+Ccedilla Ç
+Eth Ð
+Eacute É
+Ecircumflex Ê
+Egrave È
+Edieresis Ë
+Iacute Í
+Icircumflex Î
+Igrave Ì
+Idieresis Ï
+Ntilde Ñ
+Oacute Ó
+Ocircumflex Ô
+Ograve Ò
+Oslash Ø
+Otilde Õ
+Odieresis Ö
+Thorn Þ
+Uacute Ú
+Ucircumflex Û
+Ugrave Ù
+Udieresis Ü
+Yacute Ý
+ae æ
+aacute á
+acircumflex â
+agrave à
+aring å
+atilde ã
+adieresis ä
+ccedilla ç
+eacute é
+ecircumflex ê
+egrave è
+eth ð
+edieresis ë
+iacute í
+icircumflex î
+igrave ì
+idieresis ï
+ntilde ñ
+oacute ó
+ocircumflex ô
+ograve ò
+oslash ø
+otilde õ
+odieresis ö
+germandbls ß
+thorn þ
+uacute ú
+ucircumflex û
+ugrave ù
+udieresis ü
+yacute ý
+ydieresis ÿ
+newline <br>
+ordfeminine ª
+ordmasculine º
+questiondown ¿
+exclamdown ¡
+section §
+onesuperior ¹
+twosuperior ²
+threesuperior ³
+sterling £
+currency ¤
+yen ¥
+brokenbar ¦
+dieresis ¨
+opthyphen ­
+macron ¯
+paragraph ¶
+cedilla ¸
@@ -1,638 +1,317 @@
-
-# This is a beta release. A lot of this code is hacked to be backwards
-# compatible. You have been warned.
-
-=head1 NAME
-
-RTF::Parser - An event-driven RTF Parser
-
-=head1 DESCRIPTION
-
-An event-driven RTF Parser
-
-=head1 PUBLIC SERVICE ANNOUNCEMENT
-
-This is the third and final (I hope) beta release of RTF::Parser before I
-release a 'production' version (hopefully around Feb 1st 04). I took over
-RTF::Parser from Phillipe Verdret, in a state where it had no documentation.
-I've been working since then on refactoring parts of it, writing tests and
-documentation, but this is still a work in progress. Please bear with me,
-ignore the gaping ommission of tests and documentation for RTF::Control,
-and send me bug reports and suggestions.
-
-=head1 IMPORTANT HINTS
-
-RTF parsing is non-trivial. The inner workings of these modules are somewhat
-scary. You should go and read the 'Introduction' document included with this
-distribution before going any further - it explains how this distribution fits
-together, and is B<vital> reading.
-
-If you just want to convert RTF to HTML or text, from inside your own script,
-jump straight to the docs for L<RTF::HTML::Converter> or L<RTF::TEXT::Converter>
-respectively.
-
-=head1 SUBCLASSING RTF::PARSER
-
-When you subclass RTF::Parser, you'll want to do two things. You'll firstly
-want to overwrite the methods below described as the API. This describes what
-we do when we have tokens that aren't control words (except 'symbols' - see below).
-
-Then you'll want to create a hash that maps control words to code references
-that you want executed. They'll get passed a copy of the RTF::Parser object,
-the name of the control word (say, 'b'), any arguments passed with the control
-word, and then 'start'.
-
-=head2 An example...
-
-The following code removes bold tags from RTF documents, and then spits back
-out RTF.
-
- {
-
- # Create our subclass
-
- package UnboldRTF;
-
- # We'll be doing lots of printing without newlines, so don't buffer output
-
- $|++;
-
- # Subclassing magic...
-
- use RTF::Parser;
- @UnboldRTF::ISA = ( 'RTF::Parser' );
-
- # Redefine the API nicely
-
- sub parse_start { print STDERR "Starting...\n"; }
- sub group_start { print '{' }
- sub group_end { print '}' }
- sub text { print "\n" . $_[1] }
- sub char { print "\\\'$_[1]" }
- sub symbol { print "\\$_[1]" }
- sub parse_end { print STDERR "All done...\n"; }
-
- }
-
- my %do_on_control = (
-
- # What to do when we see any control we don't have
- # a specific action for... In this case, we print it.
-
- '__DEFAULT__' => sub {
-
- my ( $self, $type, $arg ) = @_;
- $arg = "\n" unless defined $arg;
- print "\\$type$arg";
-
- },
-
- # When we come across a bold tag, we just ignore it.
-
- 'b' => sub {},
-
- );
-
- # Grab STDIN...
-
- my $data = join '', (<>);
-
- # Create an instance of the class we created above
-
- my $parser = UnboldRTF->new();
-
- # Prime the object with our control handlers...
-
- $parser->control_definition( \%do_on_control );
-
- # Don't skip undefined destinations...
-
- $parser->dont_skip_destinations(1);
-
- # Start the parsing!
-
- $parser->parse_string( $data );
-
-=head1 METHODS
-
-=cut
+# Sonovision-Itep, Philippe Verdret 1998-1999
+# An event-driven RTF parser
require 5.004;
+use strict;
package RTF::Parser;
-use vars qw($VERSION);
-use strict;
-use Carp;
-use RTF::Tokenizer 1.01;
+$RTF::Parser::VERSION = "1.07";
use RTF::Config;
+use File::Basename;
-$VERSION = '1.09';
-my $DEBUG = 0;
-
-# Debugging stuff I'm leaving in in case someone is using it..,
- use constant PARSER_TRACE => 0;
-
- sub backtrace {
- Carp::confess;
- }
-
- $SIG{'INT'} = \&backtrace if PARSER_TRACE;
- $SIG{__DIE__} = \&backtrace if PARSER_TRACE;
-
-
-=head2 new
-
-Creates a new RTF::Parser object. Doesn't accept any arguments.
-
-=cut
-
-sub new {
-
- # Get the real class name
- my $proto = shift;
- my $class = ref( $proto ) || $proto;
-
- my $self = {};
-
- $self->{_RTF_CONTROL_USED}++ if $INC{'RTF/Control.pm'};
-
- $self->{_DONT_SKIP_DESTINATIONS} = 0;
-
- bless $self, $class;
-
- return $self;
-
+use constant PARSER_TRACE => 0;
+sub backtrace {
+ require Carp;
+ Carp::confess;
}
+$SIG{'INT'} = \&backtrace if PARSER_TRACE;
+$SIG{__DIE__} = \&backtrace if PARSER_TRACE;
+
+# Parser::Generic
+sub parse_stream {
+ my $self = shift;
+ my $stream = shift;
+ my $reader = shift; # eg: parse_stream(\*FH, \&read)
+ my $buffer = '';
-# For backwards compatability, we import RTF::Control's %do_on_control
-# if we've loaded RTF::Control (which would suggest we're being subclassed
-# by RTF::Control). This isn't nice or pretty, but it doesn't break things.
-# I'd do this in new() but there's no guarentee it'll be set by then...
-
-sub _install_do_on_control {
-
- my $self = shift;
-
- return if $self->{_DO_ON_CONTROL};
-
- if ( $self->{_RTF_CONTROL_USED} ) {
-
- $self->{_DO_ON_CONTROL} = \%RTF::Control::do_on_control;
-
+ unless (defined $stream) {
+ die "file not defined";
+ }
+ $self->{Filename} = '';
+ local(*F) = $stream;
+ unless (fileno F) {
+ $self->{Filename} = $stream; # Assume $stream is a filename
+ open(F, $stream) or die "Can't open '$stream' ($!)";
+ }
+ binmode(F);
+ $self->{Filehandle} = \*F;
+ $self->{Eof} = 0;
+ $self->{Buffer} = \$buffer;
+ $self->{If_data_needed} = ref $reader eq 'SUB' ?
+ $reader :
+ sub { # The default reader
+ if ($buffer .= <F>) {
+ 1;
} else {
-
- $self->{_DO_ON_CONTROL} = {};
-
+ $self->{Eof} = 1;
+ 0;
}
-
+ };
+ local *if_data_needed = $self->{If_data_needed};
+ # Now parse the stream
+ $self->if_data_needed() or die "unexpected end of data";
+ $self->parse();
+ close(F) if $self->{Filename} ne '';
+ $self;
}
-
-=head2 parse_stream( \*FH )
-
-This function used to accept a second parameter - a function specifying how
-the filehandle should be read. This is deprecated, because I could find no
-examples of people using it, nor could I see why people might want to use it.
-
-Pass this function a reference to a filehandle (or, now, a filename! yay) to
-begin reading and processing.
-
-=cut
-
-sub parse_stream {
-
- my $self = shift;
- my $stream = shift;
- my $reader = shift;
-
- $self->_install_do_on_control();
-
- die("parse_stream no longer accepts a reader") if $reader;
-
- # Put an appropriately primed RTF::Tokenizer object into our object
- $self->{_TOKENIZER} = RTF::Tokenizer->new( file => $stream );
-
- $self->_parse();
-
- return $self;
-
-}
-
-=head2 parse_string( $string )
-
-Pass this function a string to begin reading and processing.
-
-=cut
-
sub parse_string {
-
- my $self = shift;
- my $string = shift;
-
- $self->_install_do_on_control();
-
- # Put an appropriately primed RTF::Tokenizer object into our object
- $self->{_TOKENIZER} = RTF::Tokenizer->new( string => $string );
-
- $self->_parse();
-
- return $self;
-
+ my $self = shift;
+ my $buffer = $_[0];
+ $self->{Filehandle} = '';
+ $self->{Filename} = '';
+ $self->{Eof} = 0;
+ $self->{If_data_needed} = sub { 0 };
+ local *if_data_needed = $self->{If_data_needed};
+ $self->{Buffer} = \$buffer;
+ $self->parse();
+ $self;
}
-
-=head2 control_definition
-
-The code that's executed when we trigger a control event is kept
-in a hash. We're holding this somewhere in our object. Earlier
-versions would make the assumption we're being subclassed by
-RTF::Control, which isn't something I want to assume. If you are
-using RTF::Control, you don't need to worry about this, because
-we're grabbing %RTF::Control::do_on_control, and using that.
-
-Otherwise, you pass this method a reference to a hash where the keys
-are control words, and the values are coderefs that you want executed.
-This sets all the callbacks... The arguments passed to your coderefs
-are: $self, control word itself (like, say, 'par'), any parameter the
-control word had, and then 'start'.
-
-If you don't pass it a reference, you get back the reference of the
-current control hash we're holding.
-
-=cut
-
-sub control_definition {
-
- my $self = shift;
-
- if (@_) {
-
- if (ref $_[0] eq 'HASH') {
-
- $self->{_DO_ON_CONTROL} = shift;
-
- } else {
-
- die "argument of control_definition() method must be an HASHREF";
-
- }
-
- } else {
-
- return $self->{_DO_ON_CONTROL};
-
- }
-
+sub new {
+ my $receiver = shift; # or something like this
+ my $class = (ref $receiver or $receiver);
+ my $self = bless {
+ Buffer => '', # internal buffer
+ Eof => 0, # 1 if EOF, not used
+ EOR => '', # end of record regex
+ Filename => '', # filename
+ Filehandle => '', #
+ Line => 0, # not used
+ }, $class;
+ $self;
}
-=head2 rtf_control_emulation
-
-If you pass it a boolean argument, it'll set whether or not it thinks RTF::Control
-has been loaded. If you don't pass it an argument, it'll return what it thinks...
-
-=cut
-
-sub rtf_control_emulation {
-
- my $self = shift;
- my $bool = shift;
-
- if ( defined $bool ) {
-
- $self->{_RTF_CONTROL_USED} = $bool;
-
- } else {
-
- return $self->{_RTF_CONTROL_USED};
-
- }
+sub line { $_[1] ? $_[0]->{Line} = $_[1] : $_[0]->{Line} }
+sub filename { $_[1] ? $_[0]->{Filename} = $_[1] : $_[0]->{Filename} }
+sub buffer { $_[1] ? $_[0]->{Buffer} = $_[1] : $_[0]->{Buffer} }
+sub eof { $_[1] ? $_[0]->{Eof} = $_[1] : $_[0]->{Eof} }
+sub eor { $_[1] ? $_[0]->{EOR} = $_[1] : $_[0]->{EOR} }
+sub error { # not used
+ my($self, $message) = @_;
+ my $atline = $.;
+ my $infile = $self->{Filename};
}
-
-=head2 dont_skip_destinations
-
-The RTF spec says that we skip any destinations that we don't have an explicit
-handler for. You could well not want this. Accepts a boolean argument, true
-to process destinations, 0 to skip the ones we don't understand.
-
-=cut
-
-sub dont_skip_destinations {
-
- my $self = shift;
- my $bool = shift;
-
- $self->{_DONT_SKIP_DESTINATIONS} = $bool;
-
+#################################################################################
+# interface must change if you want to write: $self->$1($1, $2);
+# $self->$control($control, $arg, 'start');
+# I'll certainly redefine this in a next release
+my $DO_ON_CONTROL = \%RTF::Control::do_on_control; # default
+sub control_definition {
+ my $self = shift;
+ if (@_) {
+ if (ref $_[0]) {
+ $DO_ON_CONTROL = shift;
+ } else {
+ die "argument of control_definition() method must be an HASHREF";
+ }
+ } else {
+ $DO_ON_CONTROL;
+ }
}
-
-
-# This is how he decided to call control actions. Leaving
-# it to do the right thing at the moment... Users of the
-# module don't need to know our dirty little secret...
-
-{
- package RTF::Action;
- use RTF::Config;
-
- use vars qw($AUTOLOAD);
-
- my $default;
-
- # The original RTF::Parser allowed $LOGFILE to be set
- # that made RTF::Config do fun things. We're allowing it
- # to, but wrapping it up a bit more carefully...
- if ( $LOG_FILE ) {
-
- $default = sub { $RTF::Control::not_processed{$_[1]}++ }
-
- }
-
- my $sub;
-
- sub AUTOLOAD {
-
- my $self = $_[0];
-
- $AUTOLOAD =~ s/^.*:://;
-
- no strict 'refs';
-
- if (defined ($sub = $self->{_DO_ON_CONTROL}->{$AUTOLOAD})) {
-
- # Yuck, empty if. But we're just going to leave it for a while
-
- } else {
-
- if ( $default ) {
-
- $sub = $default
-
- } elsif ( $self->{_DO_ON_CONTROL}->{'__DEFAULT__'} ) {
-
- $sub = $self->{_DO_ON_CONTROL}->{'__DEFAULT__'};
-
- } else {
-
- $sub = sub {};
-
- }
-
- }
-
- # I don't understand why he's using goto here...
- *$AUTOLOAD = $sub;
- goto &$sub;
-
+{ package RTF::Action;
+ use RTF::Config;
+
+ use vars qw($AUTOLOAD);
+ my $default = $LOG_FILE ? # or define a __DEFAULT__ action in %do_on_control
+ sub { $RTF::Control::not_processed{$_[1]}++ } :
+ sub {};
+ my $sub;
+
+ sub AUTOLOAD {
+ #my $self = $_[0];
+ #print STDERR "definition of the '$AUTOLOAD' sub\n";
+
+ $AUTOLOAD =~ s/^.*:://;
+ no strict 'refs';
+ if (defined ($sub = $DO_ON_CONTROL->{"$AUTOLOAD"})) {
+ # Generate on the fly a new method and call it
+ #*{"$AUTOLOAD"} = $sub; &{"$AUTOLOAD"}(@_);
+ # in the OOP style: *{"$AUTOLOAD"} = $sub; $self->$AUTOLOAD(@_);
+ #goto &{*{"$AUTOLOAD"} = $sub};
+ } else {
+ #goto &{*{"$AUTOLOAD"} = $default};
+ $sub = $default;
+ }
+ *$AUTOLOAD = $sub;
+ goto &$sub;
}
-
}
-
-
-
-=head1 API
-
-These are some methods that you're going to want to over-ride if you
-subclass this modules. In general though, people seem to want to subclass
-RTF::Control, which subclasses this module.
-
-=head2 parse_start
-
-Called before we start parsing...
-
-=head2 parse_end
-
-Called when we're finished parsing
-
-=head2 group_start
-
-Called when we encounter an opening {
-
-=head2 group_end
-
-Called when we encounter a closing }
-
-=head2 text
-
-Called when we encounter plain-text. Is given the text as its
-first argument
-
-=head2 char
-
-Called when we encounter a hex-escaped character. The hex characters
-are passed as the first argument.
-
-=head2 symbol
-
-Called when we come across a control character. This is interesting, because,
-I'd have treated these as control words, so, I'm using Philippe's list as control
-words that'll trigger this for you. These are C<-_~:|{}*'\>. This needs to be
-tested.
-
-=head2 bitmap
-
-Called when we come across a command that's talking about a linked bitmap
-file. You're given the file name.
-
-=head2 binary
-
-Called when we have binary data. You get passed it.
-
-=cut
-
+sub DESTROY {}
+#################################################################################
+ # parser's API
sub parse_start {}
sub parse_end {}
sub group_start {}
sub group_end {}
sub text {}
sub char {}
-sub symbol {} # -_~:|{}*'\
-sub bitmap {} # \{bm(?:[clr]|cwd)
+sub symbol {}
+sub bitmap {}
sub binary {}
-# This is the big, bad parse routine that isn't called directly.
-# We loop around RTF::Tokenizer, making event calls when we need to.
-
- sub _parse {
-
- # Read in our object
- my $self = shift;
-
- # Execute any pre-parse subroutines
- $self->parse_start();
-
- # Loop until we find the EOF
- while (1) {
-
- # Read in our initial token
- my ( $token_type, $token_argument, $token_parameter)
- = $self->{_TOKENIZER}->get_token();
-
- # Control words
- if ( $token_type eq 'control' ) {
-
- # We have a special handler for control words
- $self->_control( $token_argument, $token_parameter );
-
- # Plain text
- } elsif ( $token_type eq 'text' ) {
-
- # Send it to the text() routine
- $self->text( $token_argument );
-
- # Groups
- } elsif ( $token_type eq 'group' ) {
-
- # Call the appropriate handler
- $token_argument ?
- $self->group_start :
- $self->group_end;
-
- # EOF
- } else {
-
- last;
-
- }
-
- }
-
- # All done
- $self->parse_end();
- $self;
-
- }
-
-# Control word handler (yeuch)
-# purl, be RTF barbie is <reply>Control words are *HARD*!
- sub _control {
-
- my $self = shift;
- my $type = shift;
- my $arg = shift;
-
- # standard, control_symbols, hex
-
- # Funky destination
- if ( $type eq '*' ) {
-
- # We might actually want to process it...
- if ( $self->{_DONT_SKIP_DESTINATIONS} ) {
+#################################################################################
+ # Parser
+# RTF Specification
+# The delimiter marks the end of the RTF control word, and can
+# be one of the following:
+# 1. a space. In this case, the space is part of the control word
+# 2. a digit or an hyphen, ...
+# 3. any character other than a letter or a digit
+#
+my $CONTROL_WORD = '[a-z]{1,32}'; # or '[a-z]+';
+my $CONTROL_ARG = '(?:\d+|-\d+)'; # argument of control words, or: '-?\d+';
+my $END_OF_CONTROL = '(?:[ ]|(?=[^a-z0-9]))';
+my $CONTROL_SYMBOLS = q![-_~:|{}*\'\\\\]!; # Symbols (Special characters)
+my $DESTINATION = '[*]';
+ # Another possibility: (?:[^\\\\{}]+|\\\\.)+
+ # the following accepts the null string:
+my $DESTINATION_CONTENT = '[^\\\\{}]*(?:\\\\.[^\\\\{}]*)*';
+my $HEXA = q![0-9abcdef][0-9abcdef]!;
+my $PLAINTEXT = '[^{}\\\\\n\r]+';
+my $BITMAP_START = '\\\\{bm(?:[clr]|cwd) '; # Ex.: \{bmcwd
+my $BITMAP_END = q!\\\\}!;
+my $BITMAP_FILE = '(?:[^\\\\{}]+|\\\\[^{}])+';
+
+sub parse {
+ my $self = shift;
+ my $buffer = $self->{Buffer};
+ my $guard = 0;
+
+ unless ($self->{EOR}) { # auto-determination
+ # or if call from parse_file()
+ # read one line and use /\cM$/
+ $self->{EOR} = ($$buffer =~ /\cM/ ? q!\r\n! : q!\n!);
+ }
- $self->_control_execute( '*' );
-
- } else {
-
- # Grab the next token
- my ( $token_type, $token_argument, $token_parameter)
- = $self->{_TOKENIZER}->get_token();
-
- # Basic sanity check
- croak('Malformed RTF - \* not followed by a control...')
- unless $token_type eq 'control';
-
- # Do we have a handler for it?
- if ( defined $self->{_DO_ON_CONTROL}->{$token_argument} ) {
- $self->_control_execute( $token_argument, $token_parameter )
- } else {
- $self->_skip_group();
- $self->group_end();
- }
- }
-
- # Binary data
- } elsif ( $type eq 'bin' ) {
-
- # Grab the next token
- my ( $token_type, $token_argument, $token_parameter)
- = $self->{_TOKENIZER}->get_token();
-
- # Basic sanity check
- croak('Malformed RTF - \bin not followed by text...')
- unless $token_type eq 'text';
-
- # Send it to the handler
- $self->binary( $token_argument );
-
- # Implement a bitmap handler here
-
- # Control symbols
- } elsif ( $type =~ m/[-_~:|{}*\\]/ ) {
-
- # Send it to the handler
- $self->symbol( $type );
-
- # Entity
- } elsif ( $type eq "'" ) {
-
- # Entity handler
- $self->char( $arg );
-
- # Some other control type - give it to the control executer
- } else {
-
- # Pass it to our default executer
- $self->_control_execute( $type, $arg )
-
- }
-
-
- }
-
-# Control word executer (this is nasty)
- sub _control_execute {
-
-
- my $self = shift;
- my $type = shift;
- my $arg = shift;
-
- no strict 'refs';
- &{"RTF::Action::$type"}($self, $type, $arg, 'start');
-
+ $self->parse_start(); # Action before parsing
+ while (1) {
+ $$buffer =~ s/^\\($CONTROL_WORD)($CONTROL_ARG)?$END_OF_CONTROL//o and do {
+ my ($control, $arg) = ($1, $2);
+ no strict 'refs';
+ &{"RTF::Action::$control"}($self, $control, $arg, 'start');
+ next;
+ };
+ $$buffer =~ s/^($PLAINTEXT)//o and do {
+ $self->text($1);
+ next;
+ };
+ $$buffer =~ s/^\{\\$DESTINATION\\(($CONTROL_WORD)($CONTROL_ARG)?)$END_OF_CONTROL//o and do {
+ # RTF Specification: "discard all text up to and including the closing brace"
+ # Example: {\*\controlWord ... }
+ # '\*' is an escaping mechanism
+
+ if (defined $DO_ON_CONTROL->{$2}) { # if it's a registered control then don't skip
+ $$buffer = "\{\\$1" . $$buffer;
+ } else { # skip!
+ my $level = 1;
+ my($control, $arg) = ($2, $3);
+ my $content = "\{\\*\\$1";
+ $self->{Start} = $.; # could be used by the error() method
+ while (1) {
+ $$buffer =~ s/^\{// and do {
+ $content .= "\{";
+ ++$level;
+ next;
+ };
+ $$buffer =~ s/^\}// and do { #
+ $content .= "\}";
+ --$level > 0 ? next : last;
+ };
+ $$buffer =~ s/^($DESTINATION_CONTENT)//o and do {
+ if ($1 ne '') {
+ $content .= $1;
+ next;
+ }
+ };
+ if ($$buffer eq '') {
+ $self->if_data_needed()
+ or die "unexpected end of data: unable to find end of destination";
+ } else {
+ die "unable to analyze '$$buffer' in destination";
+ }
}
-
-# Skip a group
- sub _skip_group {
-
- my $self = shift;
-
- my $level_counter = 1;
-
- while ( $level_counter ) {
-
- # Get a token
- my ( $token_type, $token_argument, $token_parameter)
- = $self->{_TOKENIZER}->get_token();
-
- # Make sure we can't loop forever
- last if $token_type eq 'eof';
-
- # We're in business if it's a group
- if ($token_type eq 'group') {
-
- $token_argument ?
- $level_counter++ :
- $level_counter-- ;
-
- }
-
- }
-
+ no strict 'refs';
+ &{"RTF::Action::*$control"}($self, '*' . "$control", $arg, $content);
+ }
+ next;
+ };
+ $$buffer =~ s/^\{(?!\\[*])// and do { # can't be a destination
+ $self->group_start();
+ next;
+ };
+ $$buffer =~ s/^\}// and do { #
+ $self->group_end();
+ next;
+ };
+ $$buffer =~ s/^$BITMAP_START//o and do { # bitmap filename
+ my $filename;
+ do {
+ $$buffer =~ s/^($BITMAP_FILE)//o;
+ $filename .= $1;
+
+ if ($$buffer eq '') {
+ $self->if_data_needed() or die "unexpected end of data";
}
+ } until ($$buffer =~ s/^$BITMAP_END//o);
+ $self->bitmap($filename);
+ next;
+ };
+ $$buffer =~ s/^\\\'($HEXA)//o and do {
+ $self->char($1);
+ next;
+ };
+ $$buffer =~ s/^\\($CONTROL_SYMBOLS)//o and do {
+ $self->symbol($1);
+ next;
+ };
+ $$buffer =~ s/^$self->{EOR}$//o; # End of line
+ $self->if_data_needed() and next;
+ # can't goes there, except one time at EOF
+ last if $guard++ > 0;
+ }
+ # could be in parse_end()
+ if ($$buffer ne '') {
+ my $data = substr($$buffer, 0, 100);
+ die "unanalized data: '$data ...' at line $. file $self->{Filename}\n";
+ }
+ #
+ $self->parse_end(); # Action after
+ $self;
+}
+sub read { # by line
+ my $self = $_[0];
+ my $FH = $self->{Filehandle};
+ if (${$self->{Buffer}} .= <$FH>) {
+ 1;
+ } else {
+ $self->{Eof} = 1;
+ 0;
+ }
+}
+use constant READ_BIN => 0;
+sub read_bin {
+ my $self = shift;
+ my $length = shift;
+ print STDERR "need to read $length chars\n" if READ_BIN;
+ my $bufref = $self->{Buffer};
+ my $fh = $self->{Filehandle};
+ my $binary = $$bufref . $self->{Strimmed};
+ my $toread = $length - length($binary);
+ print STDERR "data to read: $toread\n" if READ_BIN;
+ if ($toread > 0) {
+ my $n = CORE::read($fh, $binary, $toread, length($binary));
+ print STDERR "binary data: $n chars\n" if READ_BIN;
+ unless ($toread == $n) {
+ die "unable to read binary data\n";
+ }
+ } else {
+ $binary = substr($$bufref, 0, $length);
+ substr($$bufref, 0, $length) = '';
+ }
+ $self->binary($binary); # and call the binary() method
+}
1;
+__END__
-=head1 AUTHOR
-
-Peter Sergeant C<rtf.parser@clueball.com>, originally by Philippe Verdret
-
-=head1 COPYRIGHT
-
-Copyright 2004 B<Pete Sergeant>.
-
-This program is free software; you can redistribute it and/or modify it under
-the same terms as Perl itself.
-
-=head1 CREDITS
-This work was carried out under a grant generously provided by The Perl Foundation -
-give them money!
@@ -1,49 +0,0 @@
-package RTF::TEXT::Converter::ansi;
-
-my @data = (<DATA>);
-chomp(@data);
-
-sub data {
-
- return @data;
-
-}
-
-1;
-
-__DATA__
-00 `
-01 ´
-02 ^
-03 ~
-04 ­
-05 ­
-06 °
-07 ¨
-08 ·
-20 --
-30 _
-82 ,
-83 f
-84 ,,
-85 ...
-86 +
-87 ++
-88 ^
-89 0/00
-8a S
-8b <
-8c OE
-91 `
-92 '
-93 ``
-94 ''
-95 ·
-96 -
-97 --
-98 ~
-99 [tm]
-9a s
-9b >
-9c oe
-9f Y
@@ -1,197 +0,0 @@
-package RTF::TEXT::Converter::charmap;
-
-my @data = (<DATA>);
-chomp(@data);
-
-sub data {
-
- return @data;
-
-}
-
-1;
-
-__DATA__
-exclam !
-quotedbl "
-numbersign #
-dollar $
-percent %
-ampersand &
-quoteright '
-parenleft (
-parenright )
-asterisk *
-plus +
-comma ,
-hyphen -
-period .
-slash /
-zero 0
-one 1
-two 2
-three 3
-four 4
-five 5
-six 6
-seven 7
-eight 8
-nine 9
-colon :
-semicolon ;
-less <
-equal =
-greater >
-question ?
-at @
-bracketleft [
-backslash \
-bracketright ]
-asciicircum ^
-underscore _
-quoteleft `
-braceleft {
-bar |
-braceright }
-asciitilde ~
-OE OE
-acute '
-angleleft [
-angleright >
-approxequal ~
-arrowboth <->
-arrowdblboth <=>
-arrowdblleft <=
-arrowdblright =>
-arrowleft <-
-arrowright ->
-bullet *
-cent ¢
-circumflex ^
-copyright ©
-copyrightsans ©
-dagger +
-degree °
-delta d
-divide ÷
-dotlessi i
-ellipsis ...
-emdash --
-endash -
-fi fi
-fl fl
-fraction /
-grave `
-greaterequal >=
-guillemotleft «
-guillemotright »
-guilsinglleft <
-guilsinglright >
-lessequal <=
-logicalnot ¬
-mathasterisk *
-mathequal =
-mathminus -
-mathnumbersign #
-mathplus +
-mathtilde ~
-minus -
-mu µ
-multiply ×
-nobrkhyphen -
-nobrkspace  
-notequal !=
-oe oe
-onehalf ½
-onequarter ¼
-periodcentered .
-plusminus ±
-quotedblbase ,,
-quotedblleft "
-quotedblright "
-quotesinglbase ,
-registered ®
-registersans ®
-threequarters ¾
-tilde ~
-trademark [tm]
-AE Æ
-Aacute Á
-Acircumflex Â
-Agrave À
-Aring Å
-Atilde Ã
-Adieresis Ä
-Ccedilla Ç
-Eth Ð
-Eacute É
-Ecircumflex Ê
-Egrave È
-Edieresis Ë
-Iacute Í
-Icircumflex Î
-Igrave Ì
-Idieresis Ï
-Ntilde Ñ
-Oacute Ó
-Ocircumflex Ô
-Ograve Ò
-Oslash Ø
-Otilde Õ
-Odieresis Ö
-Thorn Þ
-Uacute Ú
-Ucircumflex Û
-Ugrave Ù
-Udieresis Ü
-Yacute Ý
-ae æ
-aacute á
-acircumflex â
-agrave à
-aring å
-atilde ã
-adieresis ä
-ccedilla ç
-eacute é
-ecircumflex ê
-egrave è
-eth ð
-edieresis ë
-iacute í
-icircumflex î
-igrave ì
-idieresis ï
-ntilde ñ
-oacute ó
-ocircumflex ô
-ograve ò
-oslash ø
-otilde õ
-odieresis ö
-germandbls ß
-thorn þ
-uacute ú
-ucircumflex û
-ugrave ù
-udieresis ü
-yacute ý
-ydieresis ÿ
-newline <br>
-ordfeminine ª
-ordmasculine º
-questiondown ¿
-exclamdown ¡
-section §
-onesuperior ¹
-twosuperior ²
-threesuperior ³
-sterling £
-currency ¤
-yen ¥
-brokenbar ¦
-dieresis ¨
-opthyphen ­
-macron ¯
-paragraph ¶
-cedilla ¸
@@ -3,95 +3,12 @@ use strict;
package RTF::TEXT::Converter;
use RTF::Control;
-use RTF::TEXT::Converter::ansi;
-use RTF::TEXT::Converter::charmap;
-
@RTF::TEXT::Converter::ISA = qw(RTF::Control);
use constant TRACE => 0;
use constant LIST_TRACE => 0;
use constant SHOW_RTF_LINE_NUMBER => 0;
-
-
-=head1 NAME
-
-RTF::TEXT::Converter - Perl extension for converting RTF into text
-
-=head1 DESCRIPTION
-
-Perl extension for converting RTF into text
-
-=head1 SYNOPSIS
-
- use strict;
- use RTF::TEXT::Converter;
-
- my $object = RTF::TEXT::Converter->new(
-
- output => \*STDOUT
-
- );
-
- $object->parse_stream( \*RTF_FILE );
-
-OR
-
- use strict;
- use RTF::TEXT::Converter;
-
- my $object = RTF::TEXT::Converter->new(
-
- output => \$string
-
- );
-
- $object->parse_string( $rtf_data );
-
-=head1 METHODS
-
-=head2 new()
-
-Constructor method. Currently takes one named parameter, C<output>,
-which can either be a reference to a filehandle, or a reference to
-a string. This is where our text output will end up.
-
-=head2 parse_stream()
-
-Read RTF in from a filehandle, and start processing it. Pass me
-a reference to a filehandle.
-
-=head2 parse_string()
-
-Read RTF in from a string, and start processing it. Pass me a string.
-
-=head1 JUST SO YOU KNOW
-
-You can mix-and-match your output and input methods - nothing to stop
-you outputting to a string when you've read from a filehandle...
-
-=head1 AUTHOR
-
-Peter Sergeant C<rtf.parser@clueball.com>, originally by Philippe Verdret
-
-=head1 COPYRIGHT
-
-Copyright 2004 B<Pete Sergeant>.
-
-This program is free software; you can redistribute it and/or modify it under
-the same terms as Perl itself.
-
-=head1 CREDITS
-
-This work was carried out under a grant generously provided by The Perl Foundation -
-give them money!
-
-
-=cut
-
-
-
-
# Symbol exported by the RTF::Ouptut module:
# %info: informations of the {\info ...}
# %par_props: paragraph properties
@@ -106,8 +23,6 @@ give them money!
###########################################################################
my $N = "\n"; # Pretty-printing
-my %charmap_defaults = map({ sprintf("%02x", $_) => chr($_) } (0..255));
-
# you can split on sentences here if you want!!!
# some output parameters
%do_on_event =
@@ -136,7 +51,7 @@ my %charmap_defaults = map({ sprintf("%02x", $_) => chr($_) } (0..255));
},
'par' => sub { # Default rule: if no entry for a paragraph style
# Paragraph styles
- #return output($text) unless $text =~ /\S/;
+ return output($text) unless $text =~ /\S/;
output "$text$N";
},
);
@@ -149,57 +64,34 @@ my %charmap_defaults = map({ sprintf("%02x", $_) => chr($_) } (0..255));
# - method redefinition (could be the purist's solution)
# - $Control::do_on_control{control_word} = sub {};
# - when %do_on_control is exported write:
-
-
-# OK, so a little rewrite has gone on here. I don't like opening 'ansi'
-# and 'char_map' files, so I've wrapped them in RTF::TEXT::ansi.pm, and
-# so on. This makes it an awful lot cleaner, but falls back as
-# appropriate
-
-
$do_on_control{'ansi'} = # callcack redefinition
sub {
-
- my @charmap_data = $_[SELF]->charmap_reader( $_[CONTROL] );
-
- # Create the charset hash...
- my %charset = (
-
- # Defaults...
- %charmap_defaults,
-
- # Specifics from our charset file...
- map({ s/^\s+//; split /\s+/ } @charmap_data )
-
- );
-
- # Over-ride &char to return our character mapping
- local($^W);
- *char = sub {
- output $charset{$_[1]}
- }
-
- };
+ # RTF: \'<hex value>
+ # HTML: &#<dec value>;
+ my $charset = $_[CONTROL];
+ my $charset_file = $_[SELF]->application_dir(__FILE__) . "/$charset";
+ open CHAR_MAP, "$charset_file"
+ or die "unable to open the '$charset_file': $!";
+
+ my %charset = ( # general rule
+ map({ sprintf("%02x", $_) => "&#$_;" } (0..255)),
+ # and some specific defs
+ map({ s/^\s+//; split /\s+/ } (<CHAR_MAP>))
+ );
+ *char = sub {
+ output $charset{$_[1]}
+ }
+ };
# symbol processing
# RTF: \~
# named chars
# RTF: \ldblquote, \rdblquote
-$symbol{'~'} = ' ';
-$symbol{'tab'} = "\t";
+$symbol{'~'} = ' ';
+$symbol{'tab'} = ' ';
$symbol{'ldblquote'} = '"';
$symbol{'rdblquote'} = '"';
$symbol{'line'} = "\n";
-$symbol{'_'} = '-';
-
-# If we get called from a non-ansi document, then we've not redefined
-# char() to something sensible, so we put a nice definition here...
-sub char {
-
- output $charmap_defaults{ $_[1] }
-
-}
-
sub symbol {
if (defined(my $sym = $symbol{$_[1]})) {
output $sym;
@@ -0,0 +1,35 @@
+00 `
+01 ´
+02 ^
+03 ~
+04 ­
+05 ­
+06 °
+07 ¨
+08 ·
+20 --
+30 _
+82 ,
+83 f
+84 ,,
+85 ...
+86 +
+87 ++
+88 ^
+89 0/00
+8a S
+8b <
+8c OE
+91 `
+92 '
+93 ``
+94 ''
+95 ·
+96 -
+97 --
+98 ~
+99 [tm]
+9a s
+9b >
+9c oe
+9f Y
@@ -0,0 +1,183 @@
+exclam !
+quotedbl "
+numbersign #
+dollar $
+percent %
+ampersand &
+quoteright '
+parenleft (
+parenright )
+asterisk *
+plus +
+comma ,
+hyphen -
+period .
+slash /
+zero 0
+one 1
+two 2
+three 3
+four 4
+five 5
+six 6
+seven 7
+eight 8
+nine 9
+colon :
+semicolon ;
+less <
+equal =
+greater >
+question ?
+at @
+bracketleft [
+backslash \
+bracketright ]
+asciicircum ^
+underscore _
+quoteleft `
+braceleft {
+bar |
+braceright }
+asciitilde ~
+OE OE
+acute '
+angleleft [
+angleright >
+approxequal ~
+arrowboth <->
+arrowdblboth <=>
+arrowdblleft <=
+arrowdblright =>
+arrowleft <-
+arrowright ->
+bullet *
+cent ¢
+circumflex ^
+copyright ©
+copyrightsans ©
+dagger +
+degree °
+delta d
+divide ÷
+dotlessi i
+ellipsis ...
+emdash --
+endash -
+fi fi
+fl fl
+fraction /
+grave `
+greaterequal >=
+guillemotleft «
+guillemotright »
+guilsinglleft <
+guilsinglright >
+lessequal <=
+logicalnot ¬
+mathasterisk *
+mathequal =
+mathminus -
+mathnumbersign #
+mathplus +
+mathtilde ~
+minus -
+mu µ
+multiply ×
+nobrkhyphen -
+nobrkspace  
+notequal !=
+oe oe
+onehalf ½
+onequarter ¼
+periodcentered .
+plusminus ±
+quotedblbase ,,
+quotedblleft "
+quotedblright "
+quotesinglbase ,
+registered ®
+registersans ®
+threequarters ¾
+tilde ~
+trademark [tm]
+AE Æ
+Aacute Á
+Acircumflex Â
+Agrave À
+Aring Å
+Atilde Ã
+Adieresis Ä
+Ccedilla Ç
+Eth Ð
+Eacute É
+Ecircumflex Ê
+Egrave È
+Edieresis Ë
+Iacute Í
+Icircumflex Î
+Igrave Ì
+Idieresis Ï
+Ntilde Ñ
+Oacute Ó
+Ocircumflex Ô
+Ograve Ò
+Oslash Ø
+Otilde Õ
+Odieresis Ö
+Thorn Þ
+Uacute Ú
+Ucircumflex Û
+Ugrave Ù
+Udieresis Ü
+Yacute Ý
+ae æ
+aacute á
+acircumflex â
+agrave à
+aring å
+atilde ã
+adieresis ä
+ccedilla ç
+eacute é
+ecircumflex ê
+egrave è
+eth ð
+edieresis ë
+iacute í
+icircumflex î
+igrave ì
+idieresis ï
+ntilde ñ
+oacute ó
+ocircumflex ô
+ograve ò
+oslash ø
+otilde õ
+odieresis ö
+germandbls ß
+thorn þ
+uacute ú
+ucircumflex û
+ugrave ù
+udieresis ü
+yacute ý
+ydieresis ÿ
+newline <br>
+ordfeminine ª
+ordmasculine º
+questiondown ¿
+exclamdown ¡
+section §
+onesuperior ¹
+twosuperior ²
+threesuperior ³
+sterling £
+currency ¤
+yen ¥
+brokenbar ¦
+dieresis ¨
+opthyphen ­
+macron ¯
+paragraph ¶
+cedilla ¸
@@ -0,0 +1,66 @@
+#!/usr/local/bin/perl
+# Sonovision-Itep, Verdret 1995-1999
+
+require 5.004;
+use strict;
+
+my $VERSION = "1.07";
+
+use Getopt::Long;
+use File::Basename;
+
+use vars qw/$BASENAME $DIRNAME/;
+BEGIN {
+ ($BASENAME, $DIRNAME) = fileparse($0);
+}
+use lib "$DIRNAME/lib";
+
+my $usage = "usage:
+ -h print this help
+ -l log_file RTF_file process RTF_file and generate a log file
+ -V print version number
+";
+my $help = "";
+
+use vars qw($EOM $trace);
+$trace = 0;
+$EOM = "\n"; # end of message
+
+use RTF::Config;
+
+die "$usage" unless @ARGV;
+use vars qw($trace $opt_d $opt_h $opt_t $opt_v $opt_V);
+{ local $SIG{__WARN__} = sub {};
+ GetOptions('h', # Help
+ 't=s', # name of the target document
+ 'r=s', # name of the report file
+ 'd', # debugging mode
+ 'v', # verbose
+ 'V', # print version number
+ 'l=s' => \$LOG_FILE, # -l logfile
+ ) or die "$usage$EOM";
+}
+
+if ($opt_h) {
+ print STDOUT "$help\n";
+ exit 0;
+}
+if ($opt_V) {
+ print STDOUT "$VERSION\n";
+ exit 0;
+}
+if ($opt_d) {
+ $| = 1;
+ $EOM = "";
+}
+
+select(STDOUT);
+
+require RTF::HTML::Converter;
+my $self = new RTF::HTML::Converter(Output => \*STDOUT); # actually the default
+
+foreach my $filename (@ARGV) {
+ $self->parse_stream($filename);
+}
+
+1;
@@ -0,0 +1,66 @@
+#!/usr/local/bin/perl
+# Sonovision-Itep, Verdret 1995-1999
+
+require 5.004;
+use strict;
+
+my $VERSION = "1.03";
+
+use Getopt::Long;
+use File::Basename;
+
+use vars qw/$BASENAME $DIRNAME/;
+BEGIN {
+ ($BASENAME, $DIRNAME) = fileparse($0);
+}
+use lib "$DIRNAME/lib";
+
+my $usage = "usage:
+ -h print this help
+ -l log_file RTF_file process RTF_file and generate a log file
+ -V print version number
+";
+my $help = "";
+
+use vars qw($EOM $trace);
+$trace = 0;
+$EOM = "\n"; # end of message
+
+use RTF::Config;
+
+die "$usage" unless @ARGV;
+use vars qw($trace $opt_d $opt_h $opt_t $opt_v $opt_V);
+{ local $SIG{__WARN__} = sub {};
+ GetOptions('h', # Help
+ 't=s', # name of the target document
+ 'r=s', # name of the report file
+ 'd', # debugging mode
+ 'v', # verbose
+ 'V', # print version number
+ 'l=s' => \$LOG_FILE, # -l logfile
+ ) or die "$usage$EOM";
+}
+
+if ($opt_h) {
+ print STDOUT "$help\n";
+ exit 0;
+}
+if ($opt_V) {
+ print STDOUT "$VERSION\n";
+ exit 0;
+}
+if ($opt_d) {
+ $| = 1;
+ $EOM = "";
+}
+
+select(STDOUT);
+
+require RTF::TEXT::Converter;
+my $self = new RTF::TEXT::Converter(Output => \*STDOUT); # actually the default
+
+foreach my $filename (@ARGV) {
+ $self->parse_stream($filename);
+}
+
+1;
@@ -1,20 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use RTF::Parser;
-use Test::More tests => 2;
-
-my $object = RTF::Parser->new();
-
-isa_ok( $object, 'RTF::Parser' );
-
-package RTF::SubClassTest;
-@RTF::SubClassTest::ISA = ( 'RTF::Parser' );
-
-package main;
-
-# Check we can be subclassed
-
-my $sub_object = RTF::SubClassTest->new();
-
-isa_ok( $sub_object, 'RTF::SubClassTest' );
\ No newline at end of file
@@ -1,127 +0,0 @@
-#!/usr/bin/perl
-
-# Tests for the RTF::Parser API...
-
- use strict;
- use RTF::Parser;
- use Test::More tests => 22;
-
-# Create a testing subclass...
-
- {
-
- package RTFTest;
-
- # We'll be doing lots of printing without newlines, so don't buffer output
-
- $|++;
-
- # Subclassing magic...
-
- @RTFTest::ISA = ( 'RTF::Parser' );
-
- # Redefine the API in a test-friendly way
-
- sub parse_start { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, 'parse_start'); }
- sub group_start { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, 'group_start'); }
- sub group_end { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, 'group_end'); }
- sub text { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, "text" ); }
- sub char { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, 'char'); }
- sub symbol { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, 'symbol'); }
- sub parse_end { my $self = shift; push (@{ $self->{_TEST_BUFFER} }, 'parse_end'); }
-
- }
-
- my %do_on_control = (
-
- # What to do when we see any control we don't have
- # a specific action for...
-
- '__DEFAULT__' => sub {
-
- },
-
- # Special bold handler
-
- 'b' => sub {
- my $self = shift;
- my $type = shift;
- my $arg = shift;
- push (@{ $self->{_TEST_BUFFER} }, "[$type][$arg]" );
- },
-
- );
-
- # Grab DATA...
-
- my $data = join '', (<DATA>);
-
- # Create an instance of the class we created above
-
- my $parser = RTFTest->new();
- $parser->{_TEST_BUFFER} = [];
-
- # Prime the object with our control handlers...
-
- $parser->control_definition( \%do_on_control );
-
- # Don't skip undefined destinations...
-
- $parser->dont_skip_destinations(1);
-
- # Start the parsing!
-
- $parser->parse_string( $data );
-
- # Check our test buffer
-
- my @actions = @{ content() };
-
- foreach my $buffer ( @{ $parser->{_TEST_BUFFER} } ) {
-
- my $control = shift( @actions );
-
- is( $buffer, $control, "$buffer found" );
-
- }
-
-
-
- sub content {
-
- return [
-
- 'parse_start',
- 'group_start',
- 'group_start',
- 'group_start',
- 'text',
- 'group_end',
- 'group_end',
- 'group_start',
- 'text',
- 'char',
- 'text',
- 'text',
- 'char',
- 'text',
- 'text',
- 'group_end',
- 'symbol',
- '[b][1]',
- 'text',
- '[b][0]',
- 'group_end',
- 'parse_end',
-
- ];
-
- }
-
-__END__
-{\rtf1\ansi\deff0{\fonttbl{\f0 Times New Roman;}}
-{\pard\sb300\li900
- Toc toc Il a ferm\'e9 la porte\line
- Les lys du jardin sont fl\'e9tris\line
- Quel est donc ce mort qu'on emporte
- \par}\_\b1 Tell me it's so :-)\b0}
@@ -1,16 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Test::More tests => 1;
-use RTF::Parser;
-
-{
- local( $^W );
- *RTF::Parser::text = sub { my $self = shift; $self->{_TEST_BUFF} = shift; };
-}
-
-my $parser = RTF::Parser->new( );
-
-$parser->parse_string( 'asdf' );
-
-is( $parser->{_TEST_BUFF}, "asdf", 'Data read from string' );
\ No newline at end of file
@@ -1,26 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Test::More tests => 1;
-use RTF::Parser;
-
-SKIP: {
- eval { require IO::Scalar };
- skip "IO::Scalar not installed", 1 if $@;
-
- my $string = "asdf\n";
-
- my $fh = new IO::Scalar \$string;
-
-{
- local( $^W );
- *RTF::Parser::text = sub { my $self = shift; $self->{_TEST_BUFF} = shift; };
-}
-
- my $parser = RTF::Parser->new( );
-
- $parser->parse_stream( $fh );
-
- is( $parser->{_TEST_BUFF}, "asdf", 'Data read from stream' );
-
-}
\ No newline at end of file
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use RTF::Parser;
-use Test::More tests => 2;
-
-my $object = RTF::Parser->new();
-
-ok( !($object->control_definition), "No control definitions installed yet");
-
-my $cds = {
-
- b => sub { return 'la' },
- ansi => sub { return 'ta' },
-
-};
-
-
-$object->control_definition( $cds );
-
-ok( eq_hash( ($object->control_definition), $cds), "Control definitions returned correctly");
-
@@ -1,26 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use RTF::Parser;
-use Test::More tests => 2;
-
-# First we check that destinations are skipped
-
-my $parser = RTF::Parser->new();
-
-{
- local( $^W );
- *RTF::Parser::text = sub { my $self = shift; $self->{_TEST_BUFF} = shift; };
-}
-
-$parser->parse_string( '{\rtf{\*\asdf Quick Brown}}' );
-
-ok( !$parser->{_TEST_BUFF}, "No text recorded" );
-
-# And then that they are
-
-$parser = RTF::Parser->new();
-$parser->dont_skip_destinations( 1 );
-$parser->parse_string( '{\rtf{\*\asdf Quick Brown}}' );
-
-is( $parser->{_TEST_BUFF}, "Quick Brown", "Text recorded" );
\ No newline at end of file
@@ -1,12 +0,0 @@
-#!/usr/bin/perl
-
-# We're checking that RTF::Control's new method gives us
-# back an RTF::Control object...
-
-use strict;
-use RTF::Control;
-use Test::More tests => 1;
-
-my $object = RTF::Control->new();
-
-isa_ok( $object, 'RTF::Control' );
\ No newline at end of file
@@ -1,54 +0,0 @@
-#!/usr/bin/perl
-
-# RTF::Parser's first unit-test! Yay :-)
-#
-# We're checking that our replacement _configure statment will work
-# in the same way that the original did.
-
-use strict;
-use RTF::Control;
-use Test::More tests => 6;
-
-{
-
- package RTF::TESTSET::ConfigureTests;
- use strict;
- use vars qw( $top_output );
-
- @RTF::TESTSET::ConfigureTests::ISA = ('Exporter', 'RTF::Control');
- @RTF::TESTSET::ConfigureTests::EXPORT = qw( $top_output );
-
-
- $top_output = '123';
-
- # Redefine the set_top_output_to function that would
- # normally be called.
- sub set_top_output_to {
-
- my $self = shift;
- $top_output = shift;
- }
-
-}
-
-my $object = RTF::TESTSET::ConfigureTests->new();
-
-# Check that $top_output is accessible and our default value
-is( $RTF::TESTSET::ConfigureTests::top_output, '123', 'We can check $top_output');
-
-# Try the different config styles...
-$object->_configure( -output => 'answer1' );
-is( $RTF::TESTSET::ConfigureTests::top_output, 'answer1', '-output worked');
-
-$object->_configure( -Output => 'answer2' );
-is( $RTF::TESTSET::ConfigureTests::top_output, 'answer2', '-Output worked');
-
-$object->_configure( output => 'answer3' );
-is( $RTF::TESTSET::ConfigureTests::top_output, 'answer3', 'output worked');
-
-$object->_configure( Output => 'answer4' );
-is( $RTF::TESTSET::ConfigureTests::top_output, 'answer4', 'Output worked');
-
-# Just checking...
-$object->_configure( -atput => 'answer5' );
-is( $RTF::TESTSET::ConfigureTests::top_output, 'answer4', "-atput didn't work (correct behaviour)");
\ No newline at end of file
@@ -1,18 +0,0 @@
-#!/usr/bin/perl
-
-# We're checking that application_dir returns sensibly.
-
-use strict;
-use RTF::Control;
-
-use RTF::TEXT::Converter;
-
-use Test::More tests => 1;
-
-
-{
-
- my $object = RTF::Control->new( -confdir => 'asdfasdf' );
- is( $object->application_dir, 'asdfasdf', '-confdir to set application_dir works' );
-
-}
@@ -1,33 +0,0 @@
-#!/usr/bin/perl
-
-# We're checking that application_dir returns sensibly.
-
-use strict;
-use RTF::TEXT::Converter;
-use RTF::HTML::Converter;
-
-use Test::More tests => 8;
-
-my $text_object = RTF::TEXT::Converter->new( output => \*STDOUT );
-my $html_object = RTF::HTML::Converter->new( output => \*STDOUT );
-
-
-# Test TEXT...
-{
- my @char_map_data = $text_object->charmap_reader('char_map' );
- is( $char_map_data[0], 'exclam !', "Module charmap, first result correct" );
- is( $char_map_data[1], 'quotedbl "', "Module charmap, second result correct" );
- my @ansi_data = $text_object->charmap_reader('ansi' );
- is( $ansi_data[0], '00 ` ', "Module ansi, first result correct" );
- is( $ansi_data[1], '01 ´', "Module ansi, second result correct" );
-}
-
-# Test HTML...
-{
- my @char_map_data = $html_object->charmap_reader('char_map' );
- is( $char_map_data[0], 'exclam !', "Module charmap, first result correct" );
- is( $char_map_data[1], 'quotedbl "', "Module charmap, second result correct" );
- my @ansi_data = $html_object->charmap_reader('ansi' );
- is( $ansi_data[0], '00 ` ', "Module ansi, first result correct" );
- is( $ansi_data[1], '01 ´', "Module ansi, second result correct" );
-}
\ No newline at end of file
diff --git a/var/tmp/source/SARGIE/RTF-Parser-1.09/RTF-Parser-1.09/t/02_control_05_strip_dead_content.t b/var/tmp/source/SARGIE/RTF-Parser-1.09/RTF-Parser-1.09/t/02_control_05_strip_dead_content.t
deleted file mode 100644
index 22ce4d18..00000000
Binary files a/var/tmp/source/SARGIE/RTF-Parser-1.09/RTF-Parser-1.09/t/02_control_05_strip_dead_content.t and /dev/null differ
@@ -1,26 +0,0 @@
-#!/usr/bin/perl
-
-# \ulnone should be treated as \ul0
-# So we're going to throw out a character property
-# event as if it *were* \ul0... We'll test for this
-# by looking for a<\u>b
-
-use strict;
-use Test::More tests => 1;
-use RTF::HTML::Converter;
-
-my $string;
-
-my $object = RTF::HTML::Converter->new(
-
- output => \$string
-
- );
-
-$object->parse_string( q!{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fswiss\fcharset0 Arial;}}
-\viewkind4\uc1\pard\ul\f0\fs20 a\ulnone b\par
-}!);
-
-ok( ( $string =~ m!a</u>b! ), '\ulnone treated like \ul0' );
-
-
@@ -1,19 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Test::More tests => 8;
-
-use RTF::HTML::Converter::ansi;
-use RTF::TEXT::Converter::ansi;
-use RTF::HTML::Converter::charmap;
-use RTF::TEXT::Converter::charmap;
-
-ok( (scalar RTF::HTML::Converter::ansi::data) > 1, "RTF::HTML::Converter::ansi returns more than one entry");
-ok( (scalar RTF::TEXT::Converter::ansi::data) > 1, "RTF::TEXT::Converter::ansi returns more than one entry");
-ok( (scalar RTF::HTML::Converter::charmap::data) > 1, "RTF::HTML::Converter::charmap returns more than one entry");
-ok( (scalar RTF::TEXT::Converter::charmap::data) > 1, "RTF::TEXT::Converter::charmap returns more than one entry");
-
-ok( (scalar RTF::HTML::Converter::ansi::data) > 1, "RTF::HTML::Converter::ansi returns more than one entry a second time");
-ok( (scalar RTF::TEXT::Converter::ansi::data) > 1, "RTF::TEXT::Converter::ansi returns more than one entry a second time");
-ok( (scalar RTF::HTML::Converter::charmap::data) > 1, "RTF::HTML::Converter::charmap returns more than one entry a second time");
-ok( (scalar RTF::TEXT::Converter::charmap::data) > 1, "RTF::TEXT::Converter::charmap returns more than one entry a second time");
\ No newline at end of file
@@ -1,17 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use RTF::TEXT::Converter;
-use Test::More tests => 1;
-
-my $data = join '', (<DATA>);
-my $output;
-my $object = RTF::TEXT::Converter->new( output => \$output );
-$object->parse_string( $data );
-
-ok( ( $output =~ m/abc... def/ ), "ANSI file read properly, used as appropriate" );
-
-__DATA__
-{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fswiss\fcharset0 Arial;}}
-\viewkind4\uc1\pard\ul\f0\fs20 abc\'85 def
-}
@@ -1,73 +0,0 @@
-use RTF::HTML::Converter;
-my $result;
-my $self = new RTF::HTML::Converter(Output => \$result);
-
-my @RTF_Documents = split (/\n/, <<'DATA');
-{} # Ok!
-{\par} # Ok!
-{string\par} # Ok!
-{\b bold \i Bold Italic \i0 Bold again} # Ok!
-{\b bold {\i Bold Italic }Bold again} # Ok!
-{\b bold \i Bold Italic \plain\b Bold again} # Ok!
-DATA
-
-my @HTML_Documents = (<<'D1;',<<'D2;',<<'D3;',<<'D4;',<<'D5;', <<'D6;');
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
-<html>
-<body>
- </body>
-</html>
-D1;
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
-<html>
-<body>
- </body>
-</html>
-D2;
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
-<html>
-<body>
-
-<p>string</p>
- </body>
-</html>
-D3;
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
-<html>
-<body>
-<b>bold <i>Bold Italic </i>Bold again</b> </body>
-</html>
-D4;
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
-<html>
-<body>
-<b>bold <i>Bold Italic </i>Bold again</b> </body>
-</html>
-D5;
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
-<html>
-<body>
-<b>bold <i>Bold Italic </i>Bold again</b> </body>
-</html>
-D6;
-
-print "1..", @RTF_Documents+0, "\n";
-my $test = 0;
-foreach (@RTF_Documents) {
- $test++;
- s/\#.*//;
- $result = '';
- $self->parse_string($_);
- if ($result eq $HTML_Documents[$test-1]) {
- print "ok $test\n";
- } else {
- print STDERR "$_\n";
- print STDERR $HTML_Documents[$test-1], "\n";
- print STDERR "$result\n";
- }
-}
-
-__DATA__
-
-
-
@@ -0,0 +1,73 @@
+use RTF::HTML::Converter;
+my $result;
+my $self = new RTF::HTML::Converter(Output => \$result);
+
+my @RTF_Documents = split (/\n/, <<'DATA');
+{} # Ok!
+{\par} # Ok!
+{string\par} # Ok!
+{\b bold \i Bold Italic \i0 Bold again} # Ok!
+{\b bold {\i Bold Italic }Bold again} # Ok!
+{\b bold \i Bold Italic \plain\b Bold again} # Ok!
+DATA
+
+my @HTML_Documents = (<<'D1;',<<'D2;',<<'D3;',<<'D4;',<<'D5;', <<'D6;');
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
+<html>
+<body>
+ </body>
+</html>
+D1;
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
+<html>
+<body>
+ </body>
+</html>
+D2;
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
+<html>
+<body>
+
+<p>string</p>
+ </body>
+</html>
+D3;
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
+<html>
+<body>
+<b>bold <i>Bold Italic </i>Bold again</b> </body>
+</html>
+D4;
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
+<html>
+<body>
+<b>bold <i>Bold Italic </i>Bold again</b> </body>
+</html>
+D5;
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>
+<html>
+<body>
+<b>bold <i>Bold Italic </i>Bold again</b> </body>
+</html>
+D6;
+
+print "1..", @RTF_Documents+0, "\n";
+my $test = 0;
+foreach (@RTF_Documents) {
+ $test++;
+ s/\#.*//;
+ $result = '';
+ $self->parse_string($_);
+ if ($result eq $HTML_Documents[$test-1]) {
+ print "ok $test\n";
+ } else {
+ print STDERR "$_\n";
+ print STDERR $HTML_Documents[$test-1], "\n";
+ print STDERR "$result\n";
+ }
+}
+
+__DATA__
+
+
+