The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

=head1 NAME

dpkg-scanpmpackages - creates PerlPackages index from .deb files

=head1 SYNOPSIS

	dpkg-scanpmpackages [repository_folder]
	
		--blacklist=comma,separated,package,names
			optional - packages that should not be indexed
		--pool-url=http://ftp.cz.debian.org/debian/
		    optional - an url to fetch missing .deb files from
		--cleanup-deb
		    optional - when set will remove .deb file after it was processed

=head1 DESCRIPTION

Finds all F<Packages.bz2> and for all F<*.deb> files listed there indexes
the Perl modules files F<*.pm> creating F<PerlPackages.bz2> file in the same
folder.

=cut


use strict;
use warnings;

use Getopt::Long;
use Pod::Usage;
use File::Find::Rule;
use File::Basename 'basename', 'dirname';
use Module::Build::ModuleInfo;
use File::Temp qw/ tempdir /;
use File::Path 'remove_tree';
use IO::Any 0.04;
use Parse::Deb::Control 0.03;
use List::MoreUtils 'any', 'firstval';
use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ;
use Carp 'croak';
use LWP::UserAgent;

exit main();

sub main {
	my $help;
	my $use_old_index;
	my $blacklisted_pkgs_arg = 'r-cran-gdata';
	my $pool_url;
	my $cleanup_deb;
	GetOptions(
		'help|h'      => \$help,
		'blacklist=s' => \$blacklisted_pkgs_arg,
		'pool-url=s'  => \$pool_url,
		'cleanup-deb' => \$cleanup_deb,
	) or pod2usage;
	pod2usage if $help;
	
	my $mirror_location = shift @ARGV || '.';
	pod2usage if not $mirror_location or not -d $mirror_location;
	#$mirror_location = File::Spec->rel2abs( $mirror_location );
	
	my @packages_files = 
		File::Find::Rule
		->file()
		->name( 'Packages.bz2' )
		->in( $mirror_location )
	;
	
	my @blacklisted_pkgs = split(/\s*,\s*/, $blacklisted_pkgs_arg);

	# loop through all Packages.bz2 that were found
	foreach my $packages_file (@packages_files) {
		print STDERR 'processing ', $packages_file, "\n";

		my $distribution = 'unstable';
		my $component    = 'local';

		my ($release) = eval { Parse::Deb::Control->new([dirname($packages_file), 'Release'])->get_paras('Archive') };
		if ($release) {
			$distribution = _trim($release->{'Archive'})   || $distribution;
			$component    = _trim($release->{'Component'}) || $component;
		}
		
		# read the Packages.bz2 content
		my $packages_dir = dirname($packages_file);
		my $packages_bz_content   = IO::Any->slurp($packages_file);
		my $packages_file_content;
		bunzip2 \$packages_bz_content => \$packages_file_content or die "bunzip2 failed: $Bunzip2Error\n";

		# extract list of all .deb filenames
		my %deb_to_src;
		my $packages = Parse::Deb::Control->new($packages_file_content);
		my @deb_files =
			grep {
				die $_.' from '.$packages_file.' does not exists'
					if ((! -f File::Spec->catfile($mirror_location, $_)) and !$pool_url);
				1;
			}
			map { _trim($_->{'para'}->{'Filename'}) }
			map {
				$deb_to_src{_trim($_->{'para'}->{'Filename'})} = _trim($_->{'para'}->{'Source'})
					if $_->{'para'}->{'Source'};
				$_;
			}
			grep { not _trim($_->{'para'}->{'Package'}) ~~ \@blacklisted_pkgs }
			$packages->get_keys('Filename')
		;
				
		my %provides;
		my %deb_filenames;

		# read the already processed packages
		my $perl_packages_bz = File::Spec->catfile($packages_dir, 'PerlPackages.bz2');
		if (-f $perl_packages_bz) {
			my $perl_packages_bz_content = IO::Any->slurp($perl_packages_bz);
			my $perl_packages_file_content;
			bunzip2 \$perl_packages_bz_content => \$perl_packages_file_content or die "bunzip2 failed: $Bunzip2Error\n";
			my $idx = Parse::Deb::Control->new($perl_packages_file_content);
			foreach my $entry ($idx->get_keys('Filename')) {
				my $filename = ${$entry->{'value'}};
				$filename =~ s/\s*$//;
				$filename =~ s/^\s//;
				my $para  = '';
				$entry->{'para'}->{'Distribution'} ||= ' '.$distribution."\n";
				$entry->{'para'}->{'Component'}    ||= ' '.$component."\n";
				foreach my $key (qw(Package Architecture Filename Version Source Distribution Component Perl-Modules)) {
					$para .= $key.':'.$entry->{'para'}->{$key}
						if $entry->{'para'}->{$key};
				}
				
				$deb_filenames{basename($filename, '.deb')} =  $para
					if any { $_ eq $filename } @deb_files;
			}
		}		

		# process deb files
		foreach my $deb_file (@deb_files) {
			my $deb_name = basename($deb_file, '.deb');

			$provides{$deb_name} = undef;
			
			# skip already processed
			next if exists $deb_filenames{$deb_name};

			$deb_filenames{$deb_name} = $deb_file;
			$provides{$deb_name}      = {};
			my $tmp_dir = tempdir();
			
			my $deb_full_filename = File::Spec->catfile($mirror_location, $deb_file);
			
			# fetch the deb file if not in the mirror
			if (not -f $deb_full_filename) {
				die 'no mirror url set and '.$deb_full_filename.' not found'
					if not $pool_url;
				
				my $deb_full_path = dirname($deb_full_filename);
				system('mkdir', '-p', $deb_full_path)
					if not -d $deb_full_path;
				
				fetch($pool_url.$deb_file, $deb_full_filename);
			}
			
			# extract .deb
			system(
				'dpkg',
				'-x',
				$deb_full_filename,
				$tmp_dir,
			);
			
			# get list of .pm files
			my @pm_files = File::Find::Rule
				->file()
				->name( '*.pm' )
				->in( $tmp_dir )
			;
			
			# FIXME remove .pm files that are not in standard Perl @INC folders
			#       this could be done probably based on a --perl-folders-only switch
			
			# find all package names from pm_files
			foreach my $pm_file (@pm_files) {
				# add version 0 based on filename, will be set properly later if found
				if (my $inc_prefix = firstval { index($pm_file, $tmp_dir.$_) == 0 } @INC) {
					$inc_prefix = $tmp_dir.$inc_prefix;
					my $package = substr($pm_file, length($inc_prefix)+1, -3);
					$package =~ s{/}{::}xmsg;
					$provides{$deb_name}->{$package} = 0;
				}

				# get module info
				my $info = eval { Module::Build::ModuleInfo->new_from_file($pm_file) };
				warn 'failed to get module info of "'.$pm_file.'" - "'.$@.'"' if $@;
				next if not $info;
				
				#print 'processing ', $pm_file, "\n";
				
				$pm_file =~ s{^$tmp_dir.(.+)$}{$1};
				
				foreach my $package (keys %{$info->{'versions'}}) {
					next if $package eq 'main';
					
					# skip inside packages
					my $package_file = $package.'.pm';
					$package_file =~ s{::}{/}xmsg;
					next if substr($pm_file,0-length($package_file)) ne $package_file;
					
					# set version to undef 
					my $version = (
						$info->{'versions'}->{$package}
						? $info->{'versions'}->{$package}->stringify
						: 0
					);
					
					$provides{$deb_name}->{$package} = $version;
				}
			}
			
			remove_tree($tmp_dir);
			unlink($deb_full_filename)
				if $cleanup_deb;	
		}

		# write PerlPackages.tmp
		my $perl_packages_fh = IO::Any->write([$packages_dir, 'PerlPackages.tmp']);
		foreach my $deb_name (sort keys %provides) {
			die 'wrong package name - '.$deb_name
				if $deb_name !~ m/^([^_]+) _ ([^_]+) _ ([^_]+) $/xms;
			my ($package, $version, $arch) = ($1, $2, $3);
			
			if ($provides{$deb_name}) {
				print $perl_packages_fh 'Package: ', $package, "\n";
				
				print
					$perl_packages_fh
					'Architecture: ', $arch, "\n",
					'Filename: ', $deb_filenames{$deb_name}, "\n",
					'Version: ', $version, "\n",
					'Distribution: ', $distribution, "\n",
					'Component: ', $component, "\n",
					(
						$deb_to_src{$deb_filenames{$deb_name}}
						? ('Source: ', $deb_to_src{$deb_filenames{$deb_name}}, "\n")
						: ()
					),
				;
				
				my $perl_modules_count = scalar keys %{$provides{$deb_name}};
				if ($perl_modules_count) {
					print $perl_packages_fh
						'Perl-Modules: ',
						($perl_modules_count > 1 ? "\n " : ''),
						(
							join(
								"\n ", map {
									$_.' ('.$provides{$deb_name}->{$_}.')'
								} keys %{$provides{$deb_name}}
							)
						), "\n"
					;
				}
			}
			else {
				print $perl_packages_fh $deb_filenames{$deb_name};
			}
			print $perl_packages_fh "\n";
		}

		# create the PerlPackages.bz2
		my $perl_packages     = File::Spec->catfile($packages_dir, 'PerlPackages');
		my $perl_packages_tmp = File::Spec->catfile($packages_dir, 'PerlPackages.tmp');
		unlink $perl_packages_tmp.'.bz2'
			if -f $perl_packages_tmp.'.bz2';
		system('bzip2', '-9', $perl_packages_tmp) and die $!;
		rename($perl_packages_tmp.'.bz2', $perl_packages.'.bz2') or die $!;
	}

	return 0;
}

sub fetch {
	my $url      = shift;
	my $filename = shift;
	
	my $ua = LWP::UserAgent->new;
	$ua->timeout(10);
	$ua->env_proxy;
	 
	my $response = $ua->get($url);
	
	die 'failed to fetch '.$url.' - '.$response->status_line
		if (not $response->is_success);
	
	IO::Any->spew([ $filename ], $response->decoded_content);
	
	return;
}

sub _trim {
	my $text = shift;
	croak 'too much argauments' if @_;
	
	$text =~ s/^\s+//xms;
	$text =~ s/\s+$//xms;
	
	return $text;
}

__END__

=head1 AUTHOR

Jozef Kutej

=head1 COPYRIGHT

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut