The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
#
# MkWinwordIndex
#
# Creates a HTML index for MS Office documents
#
# (C) 1997 by Bernard Weiler

#
# Config:
#
   # Directory where "lhalw" and "ldat" is stored:
   my $Bin="/usr/local/bin/";

   # Name of generated index file:
   my $MkWinwordIndex="MkWinwordIndex.html";

# --------------------
# Name:           Bernard Weiler
# Org.:           OeN TR HW A, Mch-H, Siemens AG
# Tel:            office 722-28374, private 089-7901737
# email:          pmp@egnetz.uebemc.siemens.de
#                 or Bernard.Weiler@oen.siemens.de
# Snailmail:      Siemens AG, OeN TR HW A, Mch-H, Postfach 700071, D-81359
# Munich
# Intranet:       http://susi.oen.siemens.de/~pmp
# PGP:            the public key may be downloaded from
# http://trust.sbs.de/html/med.htm
#                 or from pgp-public-keys@uni-paderborn.de
# 
# --------------36C25AB72FCE

use strict;
use 5.004;

my ($File,$is);
my $Dir=shift;
if((not defined $Dir)or($Dir =~/-h/i)){
  print "
    $0 - Directory Listing of Microsoft office Files
    Usage: 
      $0 <PathToDir>
      $0 [-h|-H|-help]
    Purpose: 
      generates in <PathToDir> a file $MkWinwordIndex containing metainformations
      of all Microsoft office files in <PathToDir>.
      It supports recursive index-trees.
    Example: 
      find . -type d $0 '{}' \;
    Author: 
      B.Weiler, TR HW A, 8.12.97
  ";
  exit;
}
open(INDEX,">$Dir/$MkWinwordIndex") or die;
my @MetaList=qw(Filename Title Authress Application Created);
my %Meta;
push(@MetaList,"Last saved");
push(@MetaList,"MetaInfos");
push(@MetaList,"DocumentContent");
chomp $Dir;
print INDEX"<H1 ALIGN=\"center\">Directory Listing of Microsoft office Files</H1>\n";
print INDEX"Directory: $Dir<P>\b";
print INDEX"<TABLE BORDER=1><TR>";
foreach $is (@MetaList){print INDEX"<TH>$is</TH>\n"}
print INDEX"</TR>\n";
opendir(DH,$Dir);
foreach $File (readdir(DH)){
  next if($File=~/^\./);
  next if($File eq $MkWinwordIndex);
  if($File=~/\.(doc|xls|ppt)$/i){
    #print INDEX STDERR "<TR><TD>$Dir/$File</TD>\n";
    print INDEX "<TR>";
    my @il;
    foreach $is (@MetaList){$Meta{$is}='-'}
    $Meta{Filename}="<A HREF=\"./$Dir/$File\">$File</A>";
    open(PIPE,"$Bin/ldat '$Dir/$File' |");
    while(<PIPE>){
      chomp;
      foreach $is (@MetaList){
        next unless($_=~$is);
	$Meta{$is}=$_;
	$Meta{$is}=~s|^.*?:||;
      }
      push(@il,$_);
      $Meta{Filename}.="<BR><FONT COLOR=\"red\"><B>Error found in Document!</B></FONT>" if(/Error/);
    }
    close PIPE;
    $Meta{MetaInfos}="<PRE>".join('<BR>',@il)."</PRE>";
    if($File=~/\.doc/){
      $Meta{"DocumentContent"}="<PRE>";
      my $ii=0;
      my $is='';
      open(PIPE,"$Bin/lhalw -FN -c 50 '$Dir/$File' |");
      while(<PIPE>){
        chomp;
	last if($ii==10);
	$ii++;
	$Meta{"DocumentContent"}.="$_ (...truncated)\n";
      }
      close(PIPE);
      $Meta{"DocumentContent"}.="\n(...truncated)</PRE>";
    }
    foreach $is (@MetaList){print INDEX"<TD>$Meta{$is}</TD>\n"}
    print INDEX"</TR>\n";
  }
  else{
    foreach $is (@MetaList){$Meta{$is}='-'}
    if(-d $File){
      $Meta{Filename}="<A HREF=\"./$Dir/$File/$MkWinwordIndex\">$File</A>";
      $Meta{Application}="Is a directory";
    }
    else{
      $Meta{Filename}="<A HREF=\"./$Dir/$File\">$File</A>";
      $Meta{Application}="not a MS-office application";
    }
    print INDEX "<TR>";
    foreach $is (@MetaList){print INDEX"<TD>$Meta{$is}</TD>\n"}
    print INDEX"</TR>\n";
  }
}
closedir(DH);
print INDEX"</TABLE>";
print INDEX"Generated by $0\n";
close INDEX;