The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
use strict;
use warnings;
use Math::Prime::Util qw/factor srand urandomm/;
use File::Temp qw/tempfile/;
use Math::BigInt try => 'GMP,Pari';
use Config;
use autodie;
use Text::Diff;
use Time::HiRes qw(gettimeofday tv_interval);
my $maxdigits = 100;
$| = 1;  # fast pipes
srand(87431);
my $num = 1000;
my $semiprimes = 0;

# Note: If you have factor from coreutils 8.20 or later (e.g. you're running
# Fedora), then GNU factor will be very fast and support at least 128-bit
# inputs (~44 digits).  Its growth is not great however, so 25+ digits starts
# getting slow.  The authors wrote on a forum that a future version will
# include a TinyQS, which should make it really rock for medium-size inputs.
#
# On the other hand, if you have the older factor (e.g. you're running
# Ubuntu) then GNU factor uses trial division so will be very painful for
# large numbers.  You'll probably want to turn it off here as it will be
# many thousands of times slower than MPU and Pari.

# A benchmarking note: in this script, getting MPU and Pari results are done
# by calling a function, where getting GNU factor results are done via
# multiple shells to /usr/bin/factor with the inputs as command line
# arguments.  This adds a lot of overhead that has nothing to do with their
# implementation.  For comparison, I've included an option for getting MPU
# factoring via calling the factor.pl script.  Weep at the startup cost.

my $do_gnu = 1;
my $do_pari = 1;
my $use_mpu_factor_script = 0;

if ($do_pari) {
  $do_pari = 0 unless eval { require Math::Pari; Math::Pari->import(); 1; };
}

{ # Test from 2 to 10000
  print "    2 -  1000"; test_array(    2 ..  1000);
  print " 1001 -  5000"; test_array( 1001 ..  5000);
  print " 5001 - 10000"; test_array( 5001 .. 10000);
}

foreach my $digits (5 .. $maxdigits) {
  printf "%5d %2d-digit numbers", $num, $digits;
  my @narray = gendigits($digits, $num);
  test_array(@narray);
  $num = int($num * 0.9) + 1; # reduce as we go
}

sub test_array {
  my @narray = @_;
  my($start, $mpusec, $gnusec, $parisec, $diff);
  my(@mpuarray, @gnuarray, @pariarray);

  print ".";
  $start = [gettimeofday];
  @mpuarray = mpu_factors(@narray);
  $mpusec = tv_interval($start);

  if ($do_gnu) {
    print ".";
    $start = [gettimeofday];
    @gnuarray = gnu_factors(@narray);
    $gnusec = tv_interval($start);
  }

  if ($do_pari) {
    print ".";
    $start = [gettimeofday];
    @pariarray = pari_factors(@narray);
    $parisec = tv_interval($start);
  }

  print ".";
  die "MPU got ", scalar @mpuarray, " factors.  GNU factor got ",
      scalar @gnuarray, "\n" unless !$do_gnu || $#mpuarray == $#gnuarray;
  die "MPU got ", scalar @mpuarray, " factors.  Pari factor got ",
      scalar @pariarray, "\n" unless !$do_pari || $#mpuarray == $#pariarray;
  foreach my $n (@narray) {
    my @mpu  = @{shift @mpuarray};
    die "mpu array is for the wrong n?" unless $n == shift @mpu;
    if ($do_gnu) {
      my @gnu  = @{shift @gnuarray};
      die "gnu array is for the wrong n?" unless $n == shift @gnu;
      $diff = diff \@mpu, \@gnu, { STYLE => 'Table' };
      die "factor($n): MPU/GNU\n$diff\n" if length($diff) > 0;
    }
    if ($do_pari) {
      my @pari = @{shift @pariarray};
      die "pari array is for the wrong n?" unless $n == shift @pari;
      my $diff = diff \@mpu, \@pari, { STYLE => 'Table' };
      die "factor($n): MPU/Pari\n$diff\n" if length($diff) > 0;
    }
  }
  print ".";
  # We should ignore the small digits, since we're comparing direct
  # Perl functions with multiple command line invocations.  It really
  # doesn't make sense until we're over 1ms per number.
  printf "  MPU:%8.4f ms", (($mpusec*1000) / scalar @narray);
  printf("  GNU:%8.4f ms", (($gnusec*1000) / scalar @narray)) if $do_gnu;
  printf("  Pari:%8.4f ms", (($parisec*1000) / scalar @narray)) if $do_pari;
  print "\n";
}

sub gendigits {
  my $digits = shift;
  die "Digits must be > 0" unless $digits > 0;
  my $howmany = shift;
  my ($base, $max);

  if ( 10**$digits < ~0) {
    $base = ($digits == 1) ? 0 : int(10 ** ($digits-1));
    $max = int(10 ** $digits);
    $max = ~0 if $max > ~0;
  } else {
    $base = Math::BigInt->new(10)->bpow($digits-1);
    $max = Math::BigInt->new(10)->bpow($digits) - 1;
  }
  my @nums;
  if ($semiprimes) {
    # This is a lousy way to do it.  We should generate a half-size prime, then
    # generate a prime whose product with the first falls in range.  Or even
    # just two half-size until the product is in range.
    for (1.. $howmany) {
      my $c;
      while (1) {
        $c = $base + urandomm($max-$base);
        my @f = factor($c);
        next if scalar(@f) != 2;
        last if $digits < 8;
        last if $digits < 12 && $f[0] > 1000;
        last if $digits < 16 && $f[0] > 100000;
        last if                 $f[0] > 10000000;
      }
      push @nums, $c;
    }
  } else {
    @nums = map { $base + urandomm($max-$base) } (1 .. $howmany);
  }
  #for (@nums) { print "$_ [",join(" ",factor($_)),"]   " }
  return @nums;
}

sub mpu_factors {
  my @piarray;

  if (!$use_mpu_factor_script) {
    push @piarray, [$_, factor($_)] for @_;
  } else {
    my @ns = @_;
    my $numpercommand = int( (4000-30)/(length($ns[-1])+1) );
    while (@ns) {
      my $cs = join(" ", 'perl -Iblib/lib -Iblib/arch bin/factor.pl', splice(@ns, 0, $numpercommand));
      my $fout = qx{$cs};
      my @flines = split(/\n/, $fout);
      foreach my $fline (@flines) {
        $fline =~ s/^(\d+): //;
        push @piarray, [$1, split(/ /, $fline)];
      }
    }
  }
  @piarray;
}

sub gnu_factors {
  my @ns = @_;
  my @piarray;
  my $numpercommand = int( (4000-30)/(length($ns[-1])+1) );

  while (@ns) {
    my $cs = join(" ", '/usr/bin/factor', splice(@ns, 0, $numpercommand));
    my $fout = qx{$cs};
    my @flines = split(/\n/, $fout);
    foreach my $fline (@flines) {
      $fline =~ s/^(\d+): //;
      push @piarray, [$1, split(/ /, $fline)];
    }
  }
  @piarray;
}

sub pari_factors {
  my @piarray;
  foreach my $n (@_) {
    my @factors;
    my ($pn,$pc) = @{Math::Pari::factorint($n)};
    # Map the Math::Pari objects returned into Math::BigInts, because Pari will
    # throw a hissy fit later when we try to compare them to anything else.
    push @piarray, [ $n, map { (Math::BigInt->new($pn->[$_])) x $pc->[$_] } (0 .. $#$pn) ];
  }
  @piarray;
}