The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package oEdtk::EDMS;
# Electronic Document Management (GED in french)
use strict;
use warnings;

use Exporter;
our $VERSION	= 0.8035;
our @ISA	= qw(Exporter);
our @EXPORT_OK	= qw(
			EDMS_edidx_build
			EDMS_edidx_write
			EDMS_idldoc_seqpg
			EDMS_idx_create_csv
			EDMS_import
			EDMS_package
			EDMS_prepare
			EDMS_process
			EDMS_process_zip
		);

# use File::Temp	qw(tempdir);
use Archive::Zip	qw(:ERROR_CODES);
use Cwd;
use File::Basename;
use File::Copy;
use Net::FTP;
use oEdtk::Main;
use oEdtk::Config	qw(config_read);
use oEdtk::DBAdmin	qw(@INDEX_COLS);
use POSIX		qw(strftime);
use Text::CSV;
use XML::Writer;

# use PDF;		# ajouter dans les prerequis


# Utility function to construct filenames.
sub EDMS_idldoc_seqpg($$) {
	my ($idldoc, $page) = @_;

	# Modifié suite au problème de . dans le nom de fichier pour Docubase
	# $idldoc =~ s/\./_/;
	return sprintf("${idldoc}_%07d", $page);
}

# Package a DOC along with its index in a zip archive for later processing.
sub EDMS_prepare($$$$) {
	my $app	= shift;
	my $idldoc= shift;
	my $doc_path=shift;
	my $idx_path=shift;
	my $doc = "$app.$idldoc.pdf";

	my $cfg = config_read('EDOCMNGR');
	my $zip = Archive::Zip->new();
	$zip->addFile($doc_path, $doc);
	$zip->addFile($idx_path, basename($idx_path));

	my $zipfile = "$cfg->{'EDTK_DIR_EDOCMNGR'}/$app.$idldoc.out.zip";
	die "ERROR: Could not create zip achive \"$zipfile\"\n"
	    unless $zip->writeToFileNamed($zipfile) == AZ_OK;
	print "$zipfile\n";

return 1;
}


# Package some documents along with one index in a zip archive.
sub EDMS_package($$@) {
	my $app	= shift;
	my $idldoc= shift;
	my @elements=@_;

	my $cfg = config_read('EDOCMNGR');
	my $zip = Archive::Zip->new();

	foreach (@elements){
		$zip->addFile($_, basename($_));
	}

	my $zipfile = "$cfg->{'EDTK_DIR_EDOCMNGR'}/$app.$idldoc.out.zip";
	die "ERROR: Could not create zip achive \"$zipfile\"\n"
	    unless $zip->writeToFileNamed($zipfile) == AZ_OK;
	print "$zipfile\n";

return 1;
}


sub EDMS_process_zip($;$) {
	my ($zipfile, $outdir) = @_;

	my $zipname = basename($zipfile);
	if ($zipname !~ /^([^.]+)\.(.+)\.out\.zip$/) {
		die "ERROR: Unexpected zip filename: $zipname\n";
	}
	my ($app, $idldoc) = ($1, $2);

	my $zip = Archive::Zip->new();
	if ($zip->read($zipfile) != AZ_OK) {
		die "ERROR: Could not read zip archive \"$zipfile\"\n";
	}

	my @files = $zip->members();
	my ($idx_member) = $zip->membersMatching('\.idx1$');
	my ($doc_member) = $zip->membersMatching('\.pdf$');
		if (!defined($doc_member)){
			($doc_member) = $zip->membersMatching('\.xls$');
		}
		if (!defined($doc_member)){
			($doc_member) = $zip->membersMatching('\.doc$');
		}
	if (!defined($doc_member) || !defined($idx_member)) {
		die "ERROR: Could not find document(s) or index file in archive\n";
	}
	my $doc_name = $doc_member->fileName();
	my $idx_name = $idx_member->fileName();
	my $doc_path = $doc_name;
	my $idx_path = $idx_name;
	if (defined($outdir)) {
		$doc_path = "$outdir/$doc_path";
		$idx_path = "$outdir/$idx_path";
	}
	warn "INFO : Extracting file \"$doc_name\"\n";
	if ($zip->extractMember($doc_member, $doc_path) != AZ_OK) {
		die "ERROR: Could not extract \"$doc_name\" from archive\n";
	}
	warn "INFO : Extracting file \"$idx_name\"\n";
	if ($zip->extractMember($idx_member, $idx_path) != AZ_OK) {
		die "ERROR: Could not extract \"$idx_name\" from archive\n";
	}

return EDMS_process($app, $idldoc, $doc_name, $idx_name, $outdir);
}


# Process document(s) with its index in a way suitable for the edms software.
sub EDMS_process($$$$;$) {
	my ($app, $idldoc, $doc, $index, $outdir) = @_;
	# Remplace les - et les . par des _ car Docubase ne peut pas importer de fichier comprenant des . dans leur nom
	$idldoc	=~ s/[-\.]/_/g;
	$app		=~ s/[-\.]/_/g;

	my $cfg = config_read('EDOCMNGR');
	my $format  = $cfg->{'EDMS_IDX_FORMAT'};
	my @edmscols = split(/,/, $cfg->{'EDMS_INDEX_COLS'});

	my $oldcwd;
	if (defined($outdir)) {
		$oldcwd = getcwd();
		chdir($outdir)
		    or die "ERROR: Cannot change current directory to \"$outdir\": $!\n";
	}
	my @outfiles = ();

	if ($doc =~ /pdf$/i){
		warn "INFO : Splitting $doc into individual docs...\n";

		## gs -sDEVICE=pdfwrite \
		##   -q -dNOPAUSE -dBATCH \
		##   -sOutputFile=sample-1.pdf \
		##   -dFirstPage=1 \
		##   -dLastPage=1 \
		##   FAX200904010240-1.PDF
		#my $this_pdf = PDF->new;
		#$this_pdf = PDF->new($doc);

		#my $output = "${app}_${idldoc}_%07d.pdf";
		#my $gs = system ($cfg->{'EDMS_BIN_GS'} . " -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dFirstPage=1 -dLastPage=". $this_pdf->Pages ." -sOutputFile=$output $doc ");
		#if ($gs != 0) {
		#	die "ERROR: Could not split pages from $doc to $output !\n";
		#}

		# Modifié suite au problème des points dans les noms de fichiers pour docubase
		my $rv = system($cfg->{'EDMS_BIN_PDFTK'} . " $doc burst output ${app}_${idldoc}_%07d.pdf ");

		if ($rv != 0) {
			die "ERROR: Could not burst PDF file $doc!\n";
		}

	} else {
		#warn "DEBUG: document $doc is not pdf file\n";
		my $cible = _docubase_file_name($doc);
		move ($doc, $cible) or die "ERROR: echec move $cible ($doc)\n";
		push (@outfiles, $cible);
	}

	if ($format eq 'DOCUBASE') {
		@outfiles = EDMS_idx_create_csv($cfg, $index, $app, $idldoc, \@edmscols);
	} elsif ($format eq 'SCOPMASTER') {
		@outfiles = EDMS_idx_create_xml($cfg, $index, $app, $idldoc, \@edmscols);
	} else {
		die "ERROR: Unexpected index format: $format\n";
	}

	if ($cfg->{'EDTK_TYPE_ENV'} ne 'Test') {
		unlink($doc) if ($doc =~ /pdf$/i);
		unlink($index);
		unlink('doc_data.txt');		# pdftk creates this one.
	}

	if (defined($outdir)) {
		# Restore original working directory.
		chdir($oldcwd);
	}

return @outfiles;
}


# TRANSFER THE PDF FILES AND THE INDEX TO edms APPLICATION.
sub EDMS_import($@) {
	my ($index, @docs) = @_;

	my $cfg = config_read('EDOCMNGR');
	warn "INFO : Connection to edms FTP server $cfg->{'EDMS_FTP_HOST'}:$cfg->{'EDMS_FTP_PORT'}\n";
	my $ftp = Net::FTP->new($cfg->{'EDMS_FTP_HOST'}, Port => $cfg->{'EDMS_FTP_PORT'})
	    or die "ERROR: Cannot connect to $cfg->{'EDMS_FTP_HOST'}: $@\n";
	$ftp->login($cfg->{'EDMS_FTP_USER'}, $cfg->{'EDMS_FTP_PASS'})
	    or die "ERROR: Cannot login: " . $ftp->message() . "\n";
	$ftp->binary()
	    or die "ERROR: Cannot set binary mode: " . $ftp->message() . "\n";
	$ftp->cwd($cfg->{'EDMS_FTP_DIR_DOCS'})
	    or die "ERROR: Cannot change working directory: " . $ftp->message() . "\n";

	# It is important to transfer the edms APPLICATION index file last, otherwise
	# the PDF files that haven't been transferred yet will not be processed.
	foreach my $doc (@docs) {
		warn "INFO : Uploading DOC file $doc\n";
		$ftp->put($doc)
		    or die "ERROR: Cannot upload DOC file : " . $ftp->message() . "\n";
	}
	warn "INFO : Uploading index file $index\n";
	$ftp->cwd()
	    or die "ERROR: Cannot change working directory : " . $ftp->message() . "\n";
	$ftp->cwd($cfg->{'EDMS_FTP_DIR_IDX'})
	    or die "ERROR: Cannot change working directory : " . $ftp->message() . "\n";
	$ftp->put($index)
	    or die "ERROR: Cannot upload index file : " . $ftp->message() . "\n";
	$ftp->quit();
}

# READ THE INITIAL INDEX FILE, AND CALL THE GIVEN FUNCTION FOR EACH NEW
# DOCUMENT. ALSO CONCATENATE PDF FILES IF NEEDED (FOR MULTI-PAGES DOCUMENTS).
sub EDMS_idx_process($$$$&) {
	my ($app, $idx, $idldoc, $keys, $sub) = @_;

	my @idxcols = map { $$_[0] } @INDEX_COLS[0..28]; # il faudrait peut être pousser jusqu'à 30 (ED_CODRUPT) voir plus

	open(my $fh, '<', $idx) or die "ERROR: Cannot open \"$idx\": $!\n";
	my $csv = Text::CSV->new({ binary => 1, sep_char => ';' });
	$csv->column_names(@idxcols);
	my $lastdoc = 0;
	my $firstpg = 0;
	my $numpgs  = 1;
	my %docvals = ();
	my $vals;

	while ($vals = $csv->getline_hr($fh)) {
		if ($vals->{'ED_SEQDOC'} != $lastdoc) {
			if ($lastdoc != 0) {
				EDMS_merge_docs($app, $idldoc, $firstpg, $numpgs);
				$sub->(\%docvals, $firstpg, $numpgs);
				undef (%docvals);
			}
			$lastdoc = $vals->{'ED_SEQDOC'};
			# Remember the values we are interested in for the edms.
			foreach (@$keys) {
				$docvals{$_} = $vals->{$_};
			}
			$docvals{'ED_DOCLIB'} = $vals->{'ED_DOCLIB'};
			$firstpg = $vals->{'ED_IDSEQPG'};
			$numpgs  = 1;

		} else {
			# Remember the values we are interested in for the edms.
			foreach (@$keys) {
				$docvals{$_} = $vals->{$_} if $vals->{$_};
			}
			$docvals{'ED_DOCLIB'} = $vals->{'ED_DOCLIB'};
			$numpgs++;
		}
	}

	# Handle the last document.
	if ($lastdoc != 0) {
		EDMS_merge_docs($app, $idldoc, $firstpg, $numpgs);
		$sub->(\%docvals, $firstpg, $numpgs);
	}
	close($fh);
}


sub _docubase_file_name($){
	my $filename = shift;

	$filename =~s/(^.*)(\.\w{2,4}$)/$1/;
	my $ext	= $2 || "";
	$filename =~s/[-\.]/_/g;
	$filename .= $ext;

return $filename;
}


# CREATE A EDMS INDEX FILE IN CSV FORMAT (FOR EDMS APPLICATION).
sub EDMS_idx_create_csv($$$$$) {
	my ($cfg, $idx, $app, $idldoc, $keys) = @_;

	my $csv = Text::CSV->new({ binary => 1, sep_char => ';', eol => "\n", quote_space => 0 });
	my $edmsidx = "${app}_$idldoc.idx";
	open(my $fh, '>', $edmsidx) or die "ERROR: Cannot create \"$edmsidx\": $!\n";

	# Always return the index file as the first file in the list, see
	# EDMS_import() for why this is important.
	my @outfiles = ($edmsidx);
	EDMS_idx_process($app, $idx, $idldoc, $keys, sub {
		my ($vals, $firstpg, $numpgs) = @_;

		if ($vals->{'ED_DOCLIB'} =~ /pdf$/i) {
			$vals->{'EDMS_IDLDOC_SEQPG'} = EDMS_idldoc_seqpg($idldoc, $firstpg);
			$vals->{'EDMS_FILENAME'}  = "${app}_". $vals->{'EDMS_IDLDOC_SEQPG'} .".pdf";
		} else {
			$vals->{'EDMS_FILENAME'}  = _docubase_file_name($vals->{'ED_DOCLIB'});
		}

		# Dates need to be in a specific format.
		my $datefmt = $cfg->{'EDMS_DATE_FORMAT'};
		if ($vals->{'ED_DTEDTION'} !~ /^(\d{4})(\d{2})(\d{2})$/) {
			die "ERROR: Unexpected date format for ED_DTEDTION: $vals->{'ED_DTEDTION'}\n";
		}
		my ($year, $month, $day) = ($1, $2, $3);
		$vals->{'EDMS_PROCESS_DT'} = strftime($datefmt, 0, 0, 0, $day, $month - 1, $year - 1900);

		# owner id for group acces in edms
		# la règle de gestion ne devrait pas etre ici, à faire évoluer
		if ($vals->{'ED_IDEMET'} =~/^\D{1}\d{3}/) {
			$vals->{'ED_OWNER'} = $vals->{'ED_IDEMET'};
		} else {
			$vals->{'ED_OWNER'} = $vals->{'ED_SOURCE'};
		}

		my @edmsvals = map { $vals->{$_} } @$keys;
		$csv->print($fh, \@edmsvals);

		push(@outfiles, $vals->{'EDMS_FILENAME'});
	});
	close($fh);
	return @outfiles;
}

# Create edms indexes in XML format (one per PDF file).
sub EDMS_idx_create_xml($$$$$) {
	my ($cfg, $idx, $app, $idldoc, $keys) = @_;

	my @outfiles = ();
	EDMS_idx_process($app, $idx, $idldoc, $keys, sub {
		my ($vals, $firstpg, $numpgs) = @_;

		my $docid = EDMS_idldoc_seqpg($idldoc, $firstpg);
		my $xmlfile = "$docid.edms.xml";
		$vals->{'ED_DOCLIB'} =~ /\.(\w{2,4})$/;
		my $ext = $1;

		open(my $fh, '>', $xmlfile) or die "ERROR: Cannot create \"$xmlfile\": $!\n";
		my $xml = XML::Writer->new(OUTPUT => $fh, ENCODING => 'utf-8');
		$xml->xmlDecl('utf-8');
		$xml->startTag('idxext');

		foreach my $pagenum (1..$numpgs) {
			$xml->startTag('page', num => $pagenum);
			if ($pagenum == 1) {
				while (my ($key,$val) = each(%$vals)) {
					$xml->emptyTag('index', key => $key, value => $val);
				}
			}
			$xml->endTag('page');
		}
		$xml->endTag('idxext');
		$xml->end();
		close($fh);

		push(@outfiles, $xmlfile);
		push(@outfiles, "${app}_$docid.$ext");
	});
	return @outfiles;
}

# Concatenate PDF documents if needed.
sub EDMS_merge_docs($$$$) {
	my ($app, $idldoc, $firstpg, $numpgs, $optimizer) = @_;
	my $cfg = config_read('EDOCMNGR'); # , $cfg->{'EDMS_PDF_OPTIMIZER'}

	# If the document is only one page long, there is nothing to concatenate.
	return unless $numpgs > 1;

	my $lastpg = $firstpg + $numpgs - 1;
	my @pages  = map { "${app}_" . EDMS_idldoc_seqpg($idldoc, $_) . ".pdf" } ($firstpg .. $lastpg);
	warn "INFO : Concatenating pages $firstpg to $lastpg into $pages[0]\n";
	my $output = "$pages[0].tmp";

	if (defined $cfg->{'EDMS_BIN_GS'} && $cfg->{'EDMS_BIN_GS'} ne "") {
		#  les pdf créés avec pdftk sont trop lourds, changement de mode opératoire ...
		my $gs = system ($cfg->{'EDMS_BIN_GS'} . " -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=$output @pages ");
		if ($gs != 0) {
			die "ERROR: Could not concatenate pages $firstpg to $lastpg!\n";
		}
	} else {
		my $rv = system($cfg->{'EDMS_BIN_PDFTK'} . " " . join(' ', @pages) . " cat output $output");
		if ($rv != 0) {
			die "ERROR: Could not concatenate pages $firstpg to $lastpg!\n";
		}
	}

	# Now, remove old files, and rename concatenated PDF to the name of
	# the PDF file of the first page.
	foreach (@pages) {
		unlink($_);
	}
	move($output, $pages[0]);
}


sub EDMS_edidx_build (\%){
	my ($refOpt) 	= @_;
	my $cfg 	= config_read('EDOCMNGR');

# EDMS_INDEX_COLS		=ED_REFIDDOC,ED_CORP,ED_SOURCE,EDMS_IDLDOC_SEQPG,ED_DTEDTION,ED_CLEGED1,ED_IDDEST,ED_NOMDEST,ED_VILLDEST,ED_IDEMET,ED_CLEGED2,ED_CLEGED3,ED_CLEGED4,ED_OWNER,EDMS_FILENAME
# clefs d'index requises 	: ED_REFIDDOC, ED_CORP, ED_SOURCE, ED_IDDEST, ED_NOMDEST, ED_IDEMET, ED_OWNER, ED_CORP
# clefs optionnelles		: ED_DTEDTION, ED_CLEGED1, ED_VILLDEST, ED_CLEGED2, ED_CLEGED3, ED_CLEGED4
# clefs (re)calculées		: ED_DTEDTION, EDMS_IDLDOC_SEQPG, EDMS_FILENAME

	# REQUIRED KEYS
	if (!defined $$refOpt{'ED_REFIDDOC'} or $$refOpt{'ED_REFIDDOC'} eq ""){
		die "ERROR: ED_REFIDDOC required.\n";
	}
	if (!defined $$refOpt{'ED_SOURCE'} or $$refOpt{'ED_SOURCE'} eq ""){
		die "ERROR: ED_SOURCE required.\n";
	}
	if (!defined $$refOpt{'ED_IDDEST'} or $$refOpt{'ED_IDDEST'} eq ""){
		die "ERROR: ED_IDDEST required.\n";
	}
	if (!defined $$refOpt{'ED_NOMDEST'} or $$refOpt{'ED_NOMDEST'} eq ""){
		die "ERROR: ED_NOMDEST required.\n";
	}
	if (!defined $$refOpt{'ED_IDEMET'} or $$refOpt{'ED_IDEMET'} eq ""){
		die "ERROR: ED_IDEMET required.\n";
	}
	if (!defined $$refOpt{'ED_OWNER'} or $$refOpt{'ED_OWNER'} eq ""){
		die "ERROR: ED_OWNER required.\n";
	}
	if (!defined $$refOpt{'ED_CORP'} or $$refOpt{'ED_CORP'} eq ""){
		die "ERROR: ED_CORP required.\n";
	}


	# COMPUTED KEYS
	my $FILE_EXT	= $$refOpt{'ED_FILENAME'}; #= $req->upload('EDMS_FILENAME');
	$FILE_EXT		=~s/^(.*\.)(\w+)$/$2/;
	$$refOpt{'ED_FORMFLUX'}	= uc ($FILE_EXT);

	my ($sec,$min,$hour,$day,$month,$year);
	if (!defined $$refOpt{'ED_DTEDTION'} || $$refOpt{'ED_DTEDTION'} !~ /^(\d{4})(\d{2})(\d{2})$/) {
		#die "ERROR: Unexpected date format for ED_DTEDTION: $$refOpt{'ED_DTEDTION'}\n";
		($sec,$min,$hour,$day,$month,$year) = localtime();
		$month ++;
		$year += 1900;
	} else {
		($year, $month, $day) = ($1, $2, $3);
	}

	# DATES NEED TO BE IN A SPECIFIC FORMAT.
	my $datefmt = $cfg->{'EDMS_DATE_FORMAT'};
	$$refOpt{'ED_DTEDTION'}		= strftime($datefmt, 0, 0, 0, $day, $month - 1, $year - 1900);
	$$refOpt{'ED_IDLDOC'}		= oEdtk::Main::oe_ID_LDOC();
	$$refOpt{'ED_IDSEQPG'}		= 1;
	$$refOpt{'ED_SEQDOC'}		= 1;
	$$refOpt{'EDMS_IDLDOC_SEQPG'} 	= EDMS_idldoc_seqpg($$refOpt{'ED_IDLDOC'}, $$refOpt{'ED_IDSEQPG'});
	$$refOpt{'EDMS_FILENAME'} 	= $$refOpt{'ED_REFIDDOC'} . "_" .$$refOpt{'EDMS_IDLDOC_SEQPG'};
	$$refOpt{'EDMS_FILENAME'} 	=~s/[-\.\s]/_/g;
	$$refOpt{'EDMS_FILENAME'} 	= $$refOpt{'EDMS_FILENAME'} . "." . $FILE_EXT ;
	$$refOpt{'ED_DOCLIB'}		= $$refOpt{'EDMS_FILENAME'};

	# OPTIONNAL KEYS
	$$refOpt{'ED_VILLDEST'}|= "";
	$$refOpt{'ED_CLEGED1'} |= "";
	$$refOpt{'ED_CLEGED2'} |= "";
	$$refOpt{'ED_CLEGED3'} |= "";
	$$refOpt{'ED_CLEGED4'} |= "";
}


sub EDMS_edidx_write (\%) {
	my ($refOpt)	= shift;
	my $cfg		= config_read('EDOCMNGR');
	my @edms_cols 	= split(/,/, $cfg->{'EDMS_INDEX_COLS'});
	my $index 	= $$refOpt{'ED_REFIDDOC'} . "_" . $$refOpt{'ED_IDLDOC'} .".idx";

	open (my $fh, ">>$index") or die "ERROR: can't open $index : $!";
	my $csv = Text::CSV->new({ binary => 1, sep_char => ';', eol => "\n", quote_space => 0 });

	my @fields; # = map { $$refOpt{$$_[0]} } @edms_cols;
	foreach my $key (@edms_cols){
		push (@fields, $$refOpt{$key});
	}

	$csv->print($fh, \@fields);
	 close($fh);
}


1;