The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

NAME

File::Pairtree - routines to manage pairtrees

SYNOPSIS

 use File::Pairtree;           # imports routines into a Perl script

 id2ppath($id);                # returns pairpath corresponding to $id
 id2ppath($id, $separator);    # if you want an alternate separator char

 ppath2id($path);              # returns id corresponding to $path
 ppath2id($path, $separator);  # if you want an alternate separator char

 pt_budstr();
 pt_mkid();
 pt_mktree();
 pt_rmid();
 pt_lsid();

DESCRIPTION

This is very brief documentation for the Pairtree Perl module.

COPYRIGHT AND LICENSE

Copyright 2008-2011 UC Regents. Open source BSD license.

#use File::Find; # $File::Find::prune = 1

# XXX add to spec: two ways that a pairpath ends: 1) the form of the # ppath (ie, ends in a morty) and 2) you run "aground" smack into # a "longy" ("thingy") or a file

# xxx other stats to gather: total dir count, total count of all things # that aren't either reg files or dirs; plus max and averages for all # things like depth of ppaths (ids), depth of objects, sizes of objects, # fanout; same numbers for "pairtree.*" branches

my ($pdname, $tpname, $wpname); my $symlinks_followed = 1; my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze); my %curobj = ( 'ppath' => '', 'encaperr' => 0, 'octets' => 0, 'streams' => 0, );

sub pt_newobj { my( $ppath, $encaperr, $octets, $streams )=@_;

        # warning: ugly code ahead
        if ($curobj{'ppath'}) {         # print record of previous obj
                $_ = ppath2id($curobj{'ppath'});
                s/^/$$gr_opt{prefix}/;          # uses global set in lstree()
                $$gr_opt{long} and
                        $gr_opt->{om}->elem('node',
                                join("  ", $_, $curobj{'ppath'},
                                "$curobj{'octets'}.$curobj{'streams'}")), 1
                or
                        $gr_opt->{om}->elem('node', $_), 1
                ;
                $curobj{'ppath'} eq $ppath and
                        print "error: corrupted pairtree at pairpath ",
                                "$ppath/: split end $homily\n";
                # xxx use om?
        }
        # xxx strange
        die "pt_newobj: all args must be defined"
                unless (defined($ppath) && defined($encaperr)
                        && defined($octets) && defined($streams));
        $curobj{'ppath'} = $ppath;
        $curobj{'encaperr'} = $encaperr;
        $curobj{'octets'} = $octets;
        $curobj{'streams'} = $streams;
}

sub pt_visit_node { # receives no args

        $pdname = $File::Find::dir;             # current parent directory name
        $tpname = $_;                           # current filename in that dir
        $_ = $wpname = $File::Find::name;       # whole pathname to file

        # We always need lstat() info on the current node XXX why?
        # xxx tells us all, but if following symlinks the lstat is done
        # ... by find:  use (-X _), but of the nifty facts below we
        # still need to harvest the size ($sze) by hand.
        #
        ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze) = lstat($tpname)
                unless ($symlinks_followed and ($sze = -s _));

        #print "NEXT: $pdname $_ $wpname\n";

        # If we follow symlinks (usual), we have to expect the -l type,
        # which hides the type of the link target (what we really want).
        #
        if (! $Win and -l _) {
                $symlinkcount++;
                print "XXXX SYMLINK $_\n";
                # yyy presumably this branch never happens when
                #     _not_ following links?
                ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze)
                        = stat($tpname);        # get the real thing
        }
        # After this, tests of the form (-X _) give almost everything.

        if (-f $tpname) {
                $filecount++;
                if (m@^.*$R/(.*/)?pairtree.*$@o) {
                        ### print "$pdname PTREEFILE $tpname\n";
                        # xxx    if $verbose;
                        # else -prune ??
                }
                elsif (m@^.*$R/$P/[^/]+$@o) {
                        #print "m@.*$R/$P/[^/]+@: $_\n";
                        #print "$pdname UF $tpname\n";
                        print "error: corrupted pairtree at pairpath ",
                                "$pdname/: found unencapsulated file ",
                                "'$tpname' $homily\n";
                }
                else {
                        # xxx sanity check that $curobj is defined
                        $curobj{'octets'} += $sze;
                        ### print "sssss $curobj{'octets'}\n";
                        $curobj{'streams'}++;
                #       -fprintf $altout 'IN %p %s\n'
                #       $noprune
                }
        }
        elsif (-d $tpname) {
                $dircount++;
                if (m@^.*$R/(.*/)?pairtree.*$@o) {
                        #print "$pdname PTREEDIR $tpname\n";
                        # xxx if $verbose;
                #       -prune
                }
                # At last, we're entering a "regular" object.
                # XXXXXXX add re qualifier so Perl knows re's not changing
                elsif (m@^.*$R/($P/)?[^/]{$pairp1,}$@o) {
                        # start new object; but end previous object first
                        # form: ppath, EncapErr, octets, streams
                        $objectcount++;
                        pt_newobj($pdname, 0, 0, 0);
                        # print "$pdname NS $tpname\n";
                        #       -fprintf $altout 'START %h 0\n'
                        #       $noprune
                }
                elsif (m@^.*$R/$P$@o) {
                        #       -empty
                        # xxx if $verbose...    -printf '%p EP -\n'
                }
                # $pair, $pairm1, $pairp1
                # We have a post-morty encapsulation error
                elsif (m@^.*$R/([^/]{$pair}/)*[^/]{1,$pairm1}/[^/]{1,$pair}$@o) {
                        #print "$pdname PM $tpname\n";
                        print "error: corrupted pairtree at pairpath ",
                                "$pdname/: found '$tpname' after forced ",
                                "path ending $homily\n";
                                
                        #       -fprintf $altout 'START %h 0\n'
                        #       $noprune
                }
        }
        else {
                $irregularcount++;
        }
}