The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

use strict;
use warnings;

# VERSION

use PDF::API2::Basic::PDF::File;

my $file = shift(@ARGV);
unless ($file) {
    print "Usage: $0 <file.pdf>\n\nDisplays information about a PDF file.\n";
    exit;
}

my $pdf = PDF::API2::Basic::PDF::File->open($file) or die "Unable to open $file: $!";
my $command = shift(@ARGV);

unless ($command) {
    print "PDF Version: " . $pdf->{' version'} . "\n";
    print "XRef Table:  " . $pdf->{' xref_position'} . "\n";
    print "Info:        " . _obj_reference($pdf->{'Info'}) . "\n" if $pdf->{'Info'};
    print "Root:        " . _obj_reference($pdf->{'Root'}) . "\n" if $pdf->{'Root'};
    print "\n";
    print "To view an object:\n";
    print "$0 <file.pdf> obj <id> [generation]\n";
    print "\n";
    print "To view a cross-reference dictionary (with optional offset in bytes):\n";
    print "$0 <file.pdf> xref [offset]\n";
    print "\n";
}
elsif ($command eq 'xref') {
    my $location = shift(@ARGV);
    $location = $pdf->{' xref_position'} unless defined $location;
    my $object = $pdf->readxrtr($location);
    print "XRef at $location\n";
    print '--------' . ('-' x length($location)) . "\n";
    _print_obj($object);
}
elsif ($command eq 'obj') {
    my $id = shift(@ARGV);
    die "Missing required object number" unless $id and $id =~ /^[0-9]+$/;
    my $generation = shift(@ARGV) || 0;

    my $location = $pdf->locate_obj($id, $generation);
    my $object = $pdf->read_objnum($id, $generation);
    print "Object $id";
    unless (ref($location)) {
        print " (file position $location)\n";
    }
    else {
        my ($obj_num, $obj_idx) = @$location;
        print " (object stream $obj_num index $obj_idx)\n";
    }
    print '-------' . ('-' x length($id)) . "\n";
    unless ($object) {
        print "[Unable to read object]\n";
    }
    else {
        _print_obj($object);
    }
}

sub _print_obj {
    my $object = shift();
    if ($object->isa('PDF::API2::Basic::PDF::Dict')) {
        print _obj_dictionary($object);
    }
    elsif ($object->isa('PDF::API2::Basic::PDF::Array')) {
        print _obj_array($object) . "\n";
    }
    elsif ($object->isa('PDF::API2::Basic::PDF::Name') or
           $object->isa('PDF::API2::Basic::PDF::Number') or
           $object->isa('PDF::API2::Basic::PDF::String')) {
        if ($object->val() =~ /^[[:print:]]+$/) {
            print $object->val() . "\n";
        }
        else {
            print $object->as_pdf() . "\n";
        }
    }
    elsif ($object->isa('PDF::API2::Basic::PDF::Null')) {
        print "<Null>\n"
    }
    else {
        print "[" . ref($object) . "]\n";
    }

    if ($object->{' stream'}) {
        print "\n";
        print "Stream\n";
        print "------\n";
        eval { $object->read_stream(1) };
        if ($@) {
            print "[Stream could not be read or decoded]\n";
        }
        elsif ($ENV{'FORCE'} or $object->{' stream'} =~ /^[[:print:]\s]*$/) {
            print $object->{' stream'} . "\n";
        }
        else {
            print "[Stream contains non-printable characters.  Set environment FORCE=1 to show the stream anyway.]\n";
        }
    }
}

sub _obj_reference {
    my $object = shift();
    return '<Object ' . $object->{' objnum'} . ($object->{' objgen'} ? ' ' . $object->{' objgen'} : '') . '>';
}

sub _obj_dictionary {
    my $object = shift();
    my $indent = shift() || 0;
    my $data = {};
    foreach my $key (keys %$object) {
        next if $key =~ /^ /;
        if (ref($object->{$key})) {
            if ($object->{$key}->isa('PDF::API2::Basic::PDF::Array')) {
                $data->{$key} = _obj_array($object->{$key}, $indent + 1);
                chomp $data->{$key};
            }
            elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Dict')) {
                if ($object->{$key}->{' objnum'}) {
                    $data->{$key} = '<Object ' . $object->{$key}->{' objnum'} . ($object->{$key}->{' objgen'} ? ' ' . $object->{$key}->{' objgen'} : '') . '>';
                }
                else {
                    unless (scalar grep { $_ !~ /^ / } keys %{$object->{$key}}) {
                        $data->{$key} = '<Empty Dictionary>';
                    }
                    else {
                        $data->{$key} = "\n" . _obj_dictionary($object->{$key}, $indent + 1);
                        chomp $data->{$key};
                    }
                }
            }
            elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Name') or
                   $object->{$key}->isa('PDF::API2::Basic::PDF::Number') or
                   $object->{$key}->isa('PDF::API2::Basic::PDF::String')) {
                if ($object->{$key}->val() =~ /^[[:print:]]+$/) {
                    $data->{$key} = $object->{$key}->val();
                }
                else {
                    $data->{$key} = $object->{$key}->as_pdf();
                }
            }
            elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Null')) {
                $data->{$key} = '<Null>';
            }
            elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Objind') and $object->{$key}->{' objnum'}) {
                $data->{$key} = '<Object ' . $object->{$key}->{' objnum'} . ($object->{$key}->{' objgen'} ? ' ' . $object->{$key}->{' objgen'} : '') . '>';
            }
            else {
                $data->{$key} = '[' . ref($object->{$key}) . ']';
            }
        }
        else {
            $data->{$key} = $object->{$key};
        }
    }
    my $longest_key = 0;
    foreach my $key (keys %$data) {
        next if $data->{$key} =~ /^\n/;
        $longest_key = length($key) if length($key) > $longest_key;
    }
    $longest_key++;
    my $value = '';
    my $cr = sub {  return 1 if substr($data->{$_[0]}, 0, 1) eq "\n"; return -1; };
    foreach my $key (sort { &$cr($a) <=> &$cr($b) or $a cmp $b } keys %$data) {
        if ($indent) {
            $value .= ' ' x ($indent * 4);
        }
        $value .= sprintf("%-${longest_key}s ", $key . ':') . $data->{$key} . "\n";
    }
    return $value;
}

sub _obj_array {
    my $object = shift();
    my $indent = shift() || 0;

    return '[ ]' unless scalar $object->elements();

    my @elements;
    my $is_complex = 0;
    foreach my $element ($object->elements()) {
        unless (ref($element)) {
            push @elements, $element;
        }
        else {
            if ($element->isa('PDF::API2::Basic::PDF::Array')) {
                push @elements, _obj_array($element, $indent + 1);
            }
            elsif ($element->isa('PDF::API2::Basic::PDF::Dict')) {
                if ($element->{' objnum'}) {
                    push @elements, '<Object ' . $element->{' objnum'} . ($element->{' objgen'} ? ' ' . $element->{' objgen'} : '') . '>';
                }
                else {
                    unless (scalar grep { $_ !~ /^ / } keys %$element) {
                        push @elements, "<Empty Dictionary>";
                    }
                    else {
                        $is_complex = 1;
                        push @elements, "Dictionary: \n" . _obj_dictionary($element, $indent + 1);
                        chomp $elements[-1];
                    }
                }
            }
            elsif ($element->isa('PDF::API2::Basic::PDF::Name') or
                   $element->isa('PDF::API2::Basic::PDF::Number') or
                   $element->isa('PDF::API2::Basic::PDF::String')) {
                if ($element->val() =~ /^[[:print:]]+$/) {
                    push @elements, $element->val();
                }
                else {
                    push @elements, $element->as_pdf();
                }
            }
            elsif ($element->isa('PDF::API2::Basic::PDF::Null')) {
                push @elements, '<Null>';
            }
            elsif ($element->isa('PDF::API2::Basic::PDF::Objind') and $element->{' objnum'}) {
                push @elements, '<Object ' . $element->{' objnum'} . ($element->{' objgen'} ? ' ' . $element->{' objgen'} : '') . '>';
            }
            else {
                push @elements, '[' . ref($element) . ']';
            }
        }
    }
    my $value;
    unless ($is_complex) {
        $value = '[ ' . join(' ', @elements) . ' ]';
    }
    else {
        $value = "\n";
        foreach my $element (@elements) {
            $value .= ' ' x ($indent * 4) if $indent;
            $value .= '- ' . $element . "\n";
        }
    }
    return $value;
}