The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Filesys::DiskUsage;

use warnings;
use strict;

use File::Basename;

=head1 NAME

Filesys::DiskUsage - Estimate file space usage (similar to `du`)

=cut

require Exporter;

our @ISA = qw(Exporter);

our %EXPORT_TAGS = (
        'all' => [ qw(
                        du
                ) ],
);

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
);

our $VERSION = '0.05';

=head1 SYNOPSIS

  use Filesys::DiskUsage qw/du/;

  # basic
  $total = du(qw/file1 file2 directory1/);

or

  # no recursion
  $total = du( { recursive => 0 } , <*> );

or

  # max-depth is 1
  $total = du( { 'max-depth' => 1 } , <*> );

or

  # get an array
  @sizes = du( @files );

or

  # get a hash
  %sizes = du( { 'make-hash' => 1 }, @files_and_directories );

=head1 FUNCTIONS

=head2 du

Estimate file space usage.

Get the size of files:

  $total = du(qw/file1 file2/);

Get the size of directories:

  $total = du(qw/file1 directory1/);

=head3 OPTIONS

=over 6

=item dereference

Follow symbolic links. Default is 0. Overrides C<symlink-size>.

Get the size of a directory, recursively, following symbolic links:

  $total = du( { dereference => 1 } , $dir );

=item exclude => PATTERN

Exclude files that match PATTERN.

Get the size of every file except for dot files:

  $total = du( { exclude => qr/^\./ } , @files ); 

=item human-readable

Return sizes in human readable format (e.g., 1K 234M 2G)

  $total = du ( { 'human-readable' => 1 } , @files );

=item Human-readable

Return sizes in human readable format, but use powers of 1000 instead
of 1024.

  $total = du ( { 'Human-readable' => 1 } , @files );

=item make-hash

Return the results in a hash.

  %sizes = du( { 'make-hash' => 1 } , @files );

=item max-depth

Sets the max-depth for recursion. A negative number means there is no
max-depth. Default is -1.

Get the size of every file in the directory and immediate
subdirectories:

  $total = du( { 'max-depth' => 1 } , <*> );

=item recursive

Sets whether directories are to be explored or not. Set to 0 if you
don't want recursion. Default is 1. Overrides C<max-depth>.

Get the size of every file in the directory, but not directories:

  $total = du( { recursive => 0 } , <*> );

=item sector-size => NUMBER

All file sizes are rounded up to a multiple of this number.  Any file
that is not an exact multiple of this size will be treated as the next
multiple of this number as they would in a sector-based file system.  Common
values will be 512 or 1024.  Default is 1 (no sectors).

  $total = du( { sector-size => 1024 }, <*> );

=item show-warnings => 1 | 0

Shows warnings when trying to open a directory that isn't readable.

  $total = du( { 'show-warnings' => 0 }, <*> );

1 by default.

=item symlink-size => NUMBER

Symlinks are assumed to be this size.  Without this option, symlinks are
ignored unless dereferenced.  Setting this option to 0 will result in the
files showing up in the hash, if C<make-hash> is set, with a size of 0.
Setting this option to any other number will treat the size of the symlink
as this number.  This option is ignored if the C<dereference> option is
set.

  $total = du( { symlink-size => 1024, sector-size => 1024 }, <*> );

=item truncate-readable => NUMBER

Human readable formats decimal places are truncated by the value of
this option. A negative number means the result won't be truncated at
all. Default if 2.

Get the size of a file in human readable format with three decimal
places:

  $size = du( { 'human-readable' => 1 , 'truncate-readable' => 3 } , $file);

=back

=cut

sub du {
  # options
  my %config = (
    'dereference'       => 0,
    'exclude'           => undef,
    'human-readable'    => 0,
    'Human-readable'    => 0,
    'make-hash'         => 0,
    'max-depth'         => -1,
    'recursive'         => 1,
    'sector-size'       => 1,
    'show-warnings'     => 1,
    'symlink-size'      => undef,
    'truncate-readable' => 2,
  );
  if (ref($_[0]) eq 'HASH') {%config = (%config, %{+shift})}
  $config{human} = $config{'human-readable'} || $config{'Human-readable'};

  my %sizes;

  # calculate sizes
  for (@_) {
    if (defined $config{exclude} and -f || -d) {
      my $filename = basename($_);
      next if $filename =~ /$config{exclude}/;
    }
    if (-l) { # is symbolic link
      if ($config{'dereference'}) { # we want to follow it
        $sizes{$_} = du( { 'recursive'   => $config{'recursive'},
                           'exclude'     => $config{'exclude'},
                           'sector-size' => $config{'sector-size'},
                         }, readlink($_));
      }
      else {
        $sizes{$_} = $config{'symlink-size'} if defined $config{'symlink-size'};
        next;
      }
    }
    elsif (-f) { # is a file
      $sizes{$_}  = $config{'sector-size'} - 1 + -s;
      $sizes{$_} -= $sizes{$_} % $config{'sector-size'};
    }
    elsif (-d) { # is a directory
      if ($config{recursive} && $config{'max-depth'}) {

        if (opendir(DIR, $_)) {
          my $dir = $_;
          my @files = readdir DIR;
          closedir(DIR);

          $sizes{$_} += du( { 'recursive'     => $config{'recursive'},
                              'max-depth'     => $config{'max-depth'} -1,
                              'exclude'       => $config{'exclude'},
                              'sector-size'   => $config{'sector-size'},
                              'show-warnings' => $config{'show-warnings'},
                            }, map {"$dir/$_"} grep {! /^\.\.?$/} @files);
        }
        elsif ( $config{'show-warnings'} ) {
            # if the user requests to be notified of non openable directories, notify the user
            warn "could not open $_ ($!)\n";
        }

      }
    }
  }

  # return sizes
  if ( $config{'make-hash'} ) {
    for (keys %sizes) {$sizes{$_} = _convert($sizes{$_}, %config)}

    return wantarray ? %sizes : \%sizes;
  }
  else {
    if (wantarray) {
      return map {_convert($_, %config)} @sizes{@_};
    }
    else {
      my $total = 0;
      for (values %sizes) {$total += $_}

      return _convert($total, %config);
    }
  }

}

# convert size to human readable format
sub _convert {
  defined (my $size = shift) || return undef;
  my $config = {@_};
  $config->{human} || return $size;
  my $block = $config->{'Human-readable'} ? 1000 : 1024;
  my @args = qw/B K M G/;

  while (@args && $size > $block) {
    shift @args;
    $size /= $block;
  }

  if ($config->{'truncate-readable'} > 0) {
    $size = sprintf("%.$config->{'truncate-readable'}f",$size);
  }

  "$size$args[0]";
}

=head1 AUTHOR

Jose Castro, C<< <cog@cpan.org> >>

=head1 COPYRIGHT & LICENSE

Copyright 2004 Jose Castro, All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

1; # End of Filesys::DiskUsage