The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl 

# This script will extract all the records out of a oai-pmh target
# and store them in a directory of your choosing. 
#
# You will see record identifiers on STDOUT as records are being retrieved.

use strict;
use warnings;
use Net::OAI::Harvester;
use Carp qw(carp);

my ($baseURL, $dir) = @ARGV;
unless ($baseURL and $dir) {
  print "usage: oai-dump oai-base-url directory\n";
  print "       eg. oai-dump http://memory.loc.gov/cgi-bin/oai2_0 loc_data\n";
  exit 1;
}

# create the directory if necessary
mkdir $dir unless -d $dir;

## create a harvester
my $harvester = Net::OAI::Harvester->new(
     baseURL => $baseURL,
     dumpDir => $dir 
);

## list all the records in a repository
my $records = $harvester->listAllRecords(
     'metadataPrefix'    => 'oai_dc'
);

while (my $record = $records->next()) {
  print $record->header->identifier, "\n";
}