The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

=head1 NAME

mogfiledebug -- Dump gobs of information about a FID

=head1 SYNOPSIS

    $ mogfiledebug --trackers=host --domain=foo --key=bar
    $ mogfiledebug --trackers=host --fid=1234

=head1 DESCRIPTION

Utility for troubleshooting problemic files in a mogilefs cluster. Also useful
for verification or testing new setups.

Finds as much information about a file as it can. All of the paths, any queues
it might be sitting in, etc. Will then test all of the paths, MD5 hash their
contents, and check the file lengths. If you see errors about a FID in
mogilefsd's logs plugging it through mogfiledebug should illuminate most of
the potential issues.

This is also useful information for posting to the mailing list, along with
the error you had.

=head1 OPTIONS

=over

=item --trackers=host1:7001,host2:7001

Use these MogileFS trackers to negotiate with.

=item --domain=<domain>

Set the MogileFS domain to use.

=item --key="<key>"

The key to inspect. Can be an arbitrary string.

=item --fid=<fid>

A numeric fid to inspect. Provide this as an alternative to a domain/key
combination.

=back

=head1 AUTHOR

Dormando E<lt>L<dormando@rydia.net>E<gt>

=head1 BUGS

None known. Could use more helpful prints, or a longer troubleshooting manual.

=head1 LICENSE

Licensed for use and redistribution under the same terms as Perl itself.

=cut

use strict;
use warnings;

use lib './lib';
use MogileFS::Utils;
use Digest::MD5;
use LWP::UserAgent;

my $util = MogileFS::Utils->new;
my $usage = qq{--trackers=host --domain=foo --key='/hello.jpg' 
If FID is known, but domain/key are not known:
--trackers=host --domain=anything --fid=123456};
# FIXME: add "nofetch" mode that just prints paths?
my $c = $util->getopts($usage, qw/key=s fid=i/);

my $mogc = $util->client;

my $arg = $c->{fid} ? 'fid' : 'key';
my $details = $mogc->file_debug($arg => $c->{$arg});
if ($mogc->errcode) {
    die "Error fetching fid info: " . $mogc->errstr;
}

my %parts = ();
my @paths = grep { $_ =~ m/^devpath_/ } keys %$details;
while (my ($k, $v) = each %$details) {
    next if $k =~ m/^devpath_/;
    if ($k =~ s/^(\w+)_//) {
        $parts{$1}->{$k} = $v;
    }
}

# If no paths, print something about that.
if (@paths) {
    my @results;
    # For each actual path, fetch and calculate the MD5SUM.
    print "Fetching and summing paths...\n";
    for my $key (@paths) {
        my $path = $details->{$key};
        push(@results, fetch_path($path));
    }
    my $hash; # detect if hashes don't match
    my $len = $parts{fid}->{length};
    print "No length, cannot verify content length" unless defined $len;
    # No I don't have a good excuse for why this isn't one loop.
    for my $res (@results) {
        print "\nResults for path: ", $res->{path}, "\n";
        if ($res->{res} =~ /404/) {
            print " - ERROR: File copy is missing: ", $res->{res}, "\n";
            next;
        }
        $hash = $res->{hash} unless $hash;
        if ($hash ne $res->{hash}) {
            print " - ERROR: Hash does not match first path!\n";
        }
        if (defined $len && defined $res->{length} && $len != $res->{length}) {
            print " - ERROR: Length does not match file row!\n";
        }
        print " - MD5 Hash: ", $res->{hash}, "\n";
        print " - Length: ", $res->{length}, "\n" if defined $res->{length};
        print " - HTTP result: ", $res->{res}, "\n";
    }
} else {
    print "No valid-ish paths found\n";
}

# print info from all of the queues. Raw is fine? failcount/etc.
print "\nTempfile and/or queue rows...\n";
my $found = 0;
for my $type (qw/tempfile replqueue delqueue rebqueue fsckqueue/) {
    my $part = $parts{$type};
    next unless (defined $part);
    $found++;
    printf("- %12s\n", $type);
    while (my ($k, $v) = each %$part) {
        printf("  %20s: %20s\n", $k, $v);
    }
}
print "none.\n" unless $found;

# Print rest of file info like file_info
if (my $fid = $parts{fid}) {
    print "\n- File Row:\n";
    for my $item (sort keys %$fid) {
        printf("  %8s: %20s\n", $item, $fid->{$item});
    }
} else {
    print qq{- ERROR: No file row was found!
File may have been deleted or never closed.
See above for any matching rows from tempfile or delqueue.
};
}

if (my $devids = $details->{devids}) {
    print "\n- Raw devids: ", $devids, "\n";
}

if (my $hash = $details->{checksum}) {
    print "\n- Stored checksum: ", $hash, "\n";
}

sub fetch_path {
    my $path = shift;
    my $ua   = LWP::UserAgent->new;
    my $ctx  = Digest::MD5->new;
    $ua->timeout(10);
    my %toret = (length => 0);

    my $sum_up = sub {
        $toret{length} += length($_[0]);
        $ctx->add($_[0]);
    };
    my $res = $ua->get($path, ':content_cb' => $sum_up,
        ':read_size_hint' => 32768);

    $toret{hash}  = $ctx->hexdigest;
    $toret{res}   = $res->status_line;
    $toret{path}  = $path;
    return \%toret;
}