The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*- perl -*-
#
# w3pdfuri.pm -- extract uris from a adobe acrobat pdf file.
#     version 0.1
$VERSION=0.1;
#
# Assumptions:
# - File starts with %PDF
# - All URLS match this RE: ^\/URI\s+\(([^)]*)\)\s*$  (found by inspection of
#   one (1) pdf file (the reader.pdf in the acrobat distribution).
# - pdf files are potentially very large, so we read it from disk
#   record by record.
# - Record separator is ^M
#
# History:
# - 19/02/97 janl: Version 0.1, seems to work with PDF-1.2
#

package w3pdfuri;

use strict;

sub list ($) {
  my ($file) = @_;

  my @urls=();
  local($/)="\r";

  unless (open(PDF,"< $file")) {
    warn "Could not open $file for input: $!\n";
    return;
  }

  $_ = <PDF>;
  unless (/^%PDF-/) {
    warn "$file is not a PDF file.\n";
    close(PDF);
    return;
  }
  
  while (<PDF>) {
    chomp;
    push(@urls,$1) if (m~^/URI\s+\(([^)]*)\)\s*$~);
  }

  close(PDF);

  return @urls;
}

1;

__END__

# Test code
print "reader.pdf: ",
  join(",",list("/local/lib/acrobat/Reader/help/reader.pdf")),"\n";
print "Acrobat.pdf: ",
  join(",",list("/local/lib/acrobat/Reader/Acrobat.pdf")),"\n";
print "License.pdf: ",
  join(",",list("/local/lib/acrobat/Reader/License.pdf")),"\n";
print "MapTypes.pdf: ",
  join(",",list("/local/lib/acrobat/Reader/MapTypes.pdf")),"\n";
list("/etc/services");