The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

package Docserver;
use strict;
use Docserver::Config;
use IO::File;
use Fcntl;

BEGIN {
	local $^W = 0;
eval <<'EOF';
	use Win32;
	use Win32::API;
	use Win32::OLE qw(in);
	use Win32::OLE::Const 'Microsoft Office';
	use Win32::OLE::Const 'Microsoft Word';
	use Win32::OLE::Const 'Microsoft Excel';
EOF
}

$Docserver::VERSION = '1.12';

# Placeholder for global error string
use vars qw( $errstr );

# Values for output formats
my %docoutform = (
		'txt' =>	'Text with Layout',
		# 'txt1' =>	wdFormatTextLineBreaks,
		'txt1' =>	wdFormatText,
		'rtf' =>	wdFormatRTF,
		'doc' =>	wdFormatDocument,
		'doc6' =>	'MSWord6Exp',
		'doc95' =>	'MSWord6Exp',
		'html' =>	wdFormatHTML,
		'ps' =>		$Docserver::Config::Config{'ps'},
		'ps1' =>	$Docserver::Config::Config{'ps1'},
		); 
my %xlsoutform = (
		'txt' =>	xlTextPrinter,
		'csv' =>	xlCSV,
		'xls' =>	xlNormal,
		'xls5' =>	xlExcel5,
		'xls95' =>	xlExcel5,
		'html' =>	xlHtml,
		'ps' =>		defined $Docserver::Config::Config{'excel.ps'}
				? $Docserver::Config::Config{'excel.ps'}
				: $Docserver::Config::Config{'ps'},
		'ps1' =>	defined $Docserver::Config::Config{'excel.ps1'}
				? $Docserver::Config::Config{'excel.ps1'}
				: $Docserver::Config::Config{'ps1'},
		);

my $SHEET_SEP = "\n##### List รจ. %s #####\n\n";
my $CSV_SHEET_SEP = "##### Sheet %s #####\n";

# Pro logovani pouzijeme metody Docserver::Srv (zdedene od Net::Daemon pres
# RPC::PlServer). Abychom ale mohli pristoupit k objektu Docserver::Srv, 
# musel byt upraven RPC/PlServer.pm, kde v metody NewHandle bylo
#     my $object = $self->CallMethod($handle, $method, @args);
# zmeneno na
#     my $object = $self->CallMethod($handle, $method, @args, $self);
# - pak se v konstruktoru new tento argument stane hodnotou 'parent_server'
sub Debug {
	my $self = shift;
	if (defined $self->{'parent_server'}) {
		$self->{'parent_server'}->Debug(@_);
	} else {
		warn scalar localtime, " ddebug, ", (sprintf shift, @_), "\n";
	}
}

# Create Docserver object, create temporary directory and create
# and open input (temporary storage) file in binmode.
sub new {
	my $class = shift;
	my $self;
	eval {
		$self = bless {
			'verbose' => 5,
			'parent_server' => shift,
			}, $class;

		my ($dir, $filename)
			= ($Docserver::Config::Config{'tmp_dir'}, 'file.in');
		if (not -d $dir) {
			$self->Debug("Directory `$dir' doesn't exist, will try to create it");
			mkdir $dir, 0666
				or die "Error creating dir `$dir': $!\n";
			die "Directory `$dir' was not created properly\n" if not -d $dir;
		}
		$dir .= '\\'.time.'.'.$$;
		mkdir $dir, 0666 or die "Error creating tmp dir `$dir': $!\n";
		$self->{'dir'} = $dir;

		$self->{'infile'} = $dir.'\\'.$filename;
		$self->Debug("Temporary file is `$self->{'infile'}'");
		($self->{'outfile'} = $self->{'infile'}) =~ s/in$/out/;

		$self->{'fh'} = new IO::File ">$self->{'infile'}"
			or die "Couldn't create file `$self->{'infile'}': $@\n";
		binmode $self->{'fh'};
	};
	if ($@) {
		$errstr = $@;
		return;
	}
	return $self;
}

# Returns error string, either for class or for object.
sub errstr {
	my $ref = shift;
	if (defined $ref and ref $ref) { return $ref->{'errstr'}; }
	return $errstr;
}

# Chooses smaller chunk size -- compares server configuration with
# value that came from client.
sub preferred_chunk_size {
	my ($self, $size) = @_;
	$size = $Docserver::Config::Config{'ChunkSize'}
		if not defined $size or $size > $Docserver::Config::Config{'ChunkSize'};
	$self->Debug("Choosing chunk size `$size'") if $self->{'verbose'};
	$self->{'ChunkSize'} = $size;
	return $size;
}

# Sets the input file length in the object.
sub input_file_length {
	my ($self, $size) = @_;
	if (defined $size) {
		$self->{'input_file_length'} = shift;
		$self->Debug("Setting input file size to `$size'") if $self->{'verbose'};
	}
	$size;
}

# Puts next chunk of data into the input file.
sub put {
	my $self = shift;
	print { $self->{'fh'} } shift;
	1;
}

# Runs the conversion from infile, from in_format to out_format.
sub convert {
	my ($self, $in_format, $out_format) = @_;
	delete $self->{'errstr'};
	$self->Debug("Called convert (`$in_format', `$out_format')")
		if $self->{'verbose'};

	eval {
		if (defined $self->{'fh'}) {
			# Close the input filehandle, no more data coming.
			$self->{'fh'}->close();
			delete $self->{'fh'};
		}
		if ($in_format =~ /doc|rtf|html|txt/) {
			# Run Word conversion.
			if (not defined $docoutform{$out_format}) {
				die "Unsupported output format `$out_format' for Word conversion\n";
			}
			$self->doc_convert($in_format, $out_format);
		}
		elsif ($in_format eq 'xls' or $in_format eq 'csv') {
			# Run Excel conversion.
			if (not defined $xlsoutform{$out_format}) {
				die "Unsupported output format `$out_format' for Excel conversion\n";
			}
			$self->xls_convert($in_format, $out_format);
		}
		else {
			die "Unsupported input format `$in_format'\n";
		}
	};
	if ($@) {
		$self->{'errstr'} = $@;
		$self->Debug("Conversion failed: $@");
	}
	return 1 if not defined $self->{'errstr'};
	return;
}

# Does the whole conversion from Word doc.
sub doc_convert {
	my ($self, $in_format, $out_format) = @_;

	# We will start new Word. It is better than doing
	# GetActiveObject because if the interactive user already has
	# some Word open, he won't see any documents flashing through
	# his screen, and vice versa, he shouldn't be able to spoil
	# our conversion. Starting new Word would be necessary if we
	# wanted the user to be able to kill potential dialog windows.

	my $word = Win32::OLE->new('Word.Application', 'Quit')
		or die Win32::OLE->LastError;

	# Convertors for Text with Layout and to Word95 are optional
	# part of Word installation and as such they don't have any
	# constant wdFormat. They get some integer upon installation
	# and we need to get them this way.

	if (not $out_format =~ /^ps\d*$/
		and not $docoutform{$out_format} =~ /^\d+$/) {
		for my $conv (in $word->FileConverters) {
			$docoutform{$out_format} = $conv->SaveFormat
				if $conv->ClassName eq $docoutform{$out_format};
		}
		if (not $docoutform{$out_format} =~ /^\d+$/) {
			die "Couldn't find converter for format `$docoutform{$out_format}'\n";
		}
		$self->Debug("Found output converter number `$docoutform{$out_format}'");
	}

	# Open method doesn't handle templates (unlike Excel), which cannot be
	# saved as anything else than templates. So it is better to open
	# document based on template -- it works even if we fill it other
	# files than templates. The format is recognized automagically.

	my $doc = $word->Documents->Add({
		'Template' => $self->{'infile'},
		}) or die Win32::OLE->LastError;

	if ($out_format =~ /^ps\d*$/) {
		# Print to file. We have to run it on background
		# so that we don't get some dialog that would allow
		# interactive user to cancel the print.

		my $origback = $word->Options->{PrintBackground};
		my $origprinter = $word->ActivePrinter;

		$word->Options->{PrintBackground} = 1;
		my $printer = $docoutform{$out_format};
		$self->Debug("Setting ActivePrinter to `$printer'");
		$word->{ActivePrinter} = $printer;
		if ($word->{ActivePrinter} ne $printer) {
			$self->Debug("ActivePrinter set to `$word->{ActivePrinter}'");
			die "Setting ActivePrinter to `$printer' failed -- printer not found\n";
		}

		$doc->Activate;
		$word->PrintOut({
			'Range' => wdPrintAllDocument,
			'PrintToFile' => 1, 
			'OutputFileName' => $self->{'outfile'},
			'Copies' => 1
			});
		for (my $i = 0; $i < 60; $i++) {
			sleep 2;
			last unless $word->{BackgroundPrintingStatus};
		}
		$word->Options->{PrintBackground} = $origback;
		$word->{ActivePrinter} = $origprinter;
	} else {
		# The Text with Layout has problems with pictures,
		# probably whenever the picture is in header or footer
		# (it issues error message that saving cannot be
		# finished because access rights are wrong), and
		# sometimes even in normal text when it produces
		# garbage.
		#
		# Because it seems to ignore shapes wich text fields
		# and probably of all types, we'll delete all shapes.
		# We have to delete shapes from header and footer
		# separately ($doc->Shapes won't return them),
		# according to the manual we can take Shapes property
		# from any HeaderFooter object and the returned
		# collection will contain all shapes from all headers
		# and footers.

		if ($out_format eq 'txt') {
			for my $shape (in $doc->Shapes) {
				$shape->Delete;
			}
			for my $shape (in $doc->Sections(1)->Headers(wdHeaderFooterPrimary)->Shapes) {
				$shape->Delete;
			}
		}

		# The normal text convertor (txt1) puts header under
		# the normal text.

		# If the original document
		# containg page numbers in headers or footers, the
		# Text with Layout puts to the beginning of the output
		# (and the normal text converter to the end of the
		# output) some number, usually number of the first
		# page, but the following pages are not numbered.
		# That's why we'll remove all page numbers,
		# unfortunately there doesn't seem to be any better
		# way than walking through all combinations of header
		# and footer and constants WdHeaderFooterIndex.

		if ($out_format =~ /^txt1?$/) {
			for my $section (in $doc->Sections) {
				for my $pagenumber (
					in $section->Footers(wdHeaderFooterPrimary)->PageNumbers,
					in $section->Headers(wdHeaderFooterPrimary)->PageNumbers,
					in $section->Footers(wdHeaderFooterEvenPages)->PageNumbers,
					in $section->Headers(wdHeaderFooterEvenPages)->PageNumbers,
					in $section->Footers(wdHeaderFooterFirstPage)->PageNumbers,
					in $section->Headers(wdHeaderFooterFirstPage)->PageNumbers
					) {
					$pagenumber->Delete;
				}
			}
		}

		$doc->SaveAs({
			'FileName' => $self->{'outfile'},
			'FileFormat' => $docoutform{$out_format}
			});
	}
	$doc->Close({
		'SaveChanges' => wdDoNotSaveChanges,
		});
}
		
sub xls_convert
	{
	my ($self, $in_format, $out_format) = @_;

	my $excel = Win32::OLE->new('Excel.Application', 'Quit')
		or die Win32::OLE->LastError;

	my $wrk = $excel->Workbooks->Open({
		'FileName' => $self->{'infile'},
		($in_format eq 'csv' ? ('Format' => 4) : ()),
		}) or die Win32::OLE->LastError;

	# We'll set nice name of the sheet if the input comes from CSV
	# to have reasonable name for output to PS or XLS.
	$wrk->Sheets(1)->{'Name'} = 'Sheet1' if $in_format eq 'csv';

	if ($out_format =~ /^(xls(95)?|txt)$/) {
		$wrk->SaveAs({
			'FileName' => $self->{'outfile'},
			'FileFormat' => $xlsoutform{$out_format}
			});

	} elsif ($out_format =~ /^ps\d?$/) {
		# It seems like Excel cannot do background printing
		# like Word can. Fortunately the dialog box is not
		# active so it cannot be hit by accident.
		$excel->{ActivePrinter} = $xlsoutform{$out_format};
		$wrk->Activate;
		$wrk->PrintOut({
			'PrintToFile' => 1, 
			'PrToFileName' => $self->{'outfile'},
			});

	} elsif ($out_format eq 'csv') {
		open FILEOUT, "> $self->{'outfile'}" or die "Error writing $self->{'outfile'}: $!";
		binmode FILEOUT;
		for my $i (1 .. $wrk->Sheets->Count) {
			$wrk->Sheets($i)->SaveAs({
				'FileName' => "$self->{'outfile'}$i",
				'FileFormat' => $xlsoutform{$out_format},
			});
			printf FILEOUT $CSV_SHEET_SEP, $i if $i > 1;
			open IN, "$self->{'outfile'}$i";
			binmode IN;
			while (<IN>) {
				print FILEOUT;
			}
			close IN;
		}
		close FILEOUT;
	
	} elsif ($out_format eq 'html') {
		for my $sheet (in $wrk->Sheets) {
			$wrk->PublishObjects->Add({
				'SourceType' => xlSourceSheet,
				'Filename' => $self->{'outfile'},
				'Sheet' => $sheet->Name,
				'HtmlType' => xlHtmlStatic,
				})->Publish({
					'Create' => 0,
					});
		}	
	}

	$wrk->Close({
		'SaveChanges' => 0
		});
	opendir DIR, $self->{'dir'};
	map unlink("$self->{'dir'}/$_") || warn("$_ $!\n"), grep /out\d+$/, readdir DIR;
	closedir DIR;
}


sub result_length {
	my $self = shift;
	return -s $self->{'outfile'};
}

# Returns next piece of output file.
sub get {
	my ($self, $len) = @_;
	my $fh = $self->{'outfh'};
	if (not defined $fh) {
		$fh = $self->{'outfh'} = new IO::File($self->{'outfile'});
		binmode $fh;
	}
	my $buffer;
	read $fh, $buffer, $len;
	$buffer;
}

sub finished {
	my $self = shift;
	close delete $self->{'fh'} if defined $self->{'fh'};
	close delete $self->{'outfh'} if defined $self->{'outfh'};
	unlink delete $self->{'infile'} if defined $self->{'infile'};
	unlink delete $self->{'outfile'} if defined $self->{'outfile'};
	rmdir delete $self->{'dir'} if defined $self->{'dir'};
}

sub DESTROY {
	shift->finished;
}

sub server_version {
	return $Docserver::VERSION;
}

1;

=head1 NAME

Docserver.pm - server module for remote MS format conversions

=head1 AUTHOR

(c) 1998--2002 Jan Pazdziora, adelton@fi.muni.cz,
http://www.fi.muni.cz/~adelton/ at Faculty of Informatics, Masaryk
University in Brno, Czech Republic.

Pavel Smerk added support for more formats and also did the error
and Windows handling.

=cut