The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
##
#
#    Copyright 2001-2007, AllAfrica Global Media
#
#    This file is part of XML::Comma
#
#    XML::Comma is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    For more information about XML::Comma, point a web browser at
#    http://xml-comma.org, or read the tutorial included
#    with the XML::Comma distribution at docs/guide.html
#
##

package XML::Comma::Storage::Iterator;

use vars '$AUTOLOAD';

use strict;
use File::Find;
use XML::Comma::Util qw( dbg  );

# _Iterator_Store
# _Iterator_cached_list
# _Iterator_index
# _Iterator_direction
# _Iterator_last_doc
# _Iterator_newly_refreshed

###
### code to match iterator semantics
###

use overload bool => \&_iterator_has_stuff,
  '""' => sub { return $_[0] },
  '++' => \&_it_next,
  '--' => \&_it_prev,
  '=' => sub { return $_[0] };
# boy it'd sure be nice if we could do: while (my $doc = $it++) ...
#  '=' => sub { return $_[0]->read_doc };
# but it doesn't work, at least the way I tried it...

#these are only called by the overloading goo...
sub _it_next { return $_[0]->_it_advance(1);  }
sub _it_prev { return $_[0]->_it_advance(-1); }
sub _it_advance {
  my ($self, $amt) = @_;
  if($self->{_Iterator_newly_refreshed}) {
    $self->{_Iterator_newly_refreshed} = 0;
    # return true if there is anything in the iterator ("fake $it")
    return scalar @{$self->{_Iterator_cached_list}};
  }
  
  #the rest of this function is a lot like read_next (and next_id), except
  #it does a few things differently so we don't loop forever...
  $amt = -$amt if($self->{_Iterator_direction} eq '+');
  if($amt > 0) {
    $self->inc($amt) unless($self->{_Iterator_index} > $#{$self->{_Iterator_cached_list}}+1);
  } else {
    # -- is generally not useful, but $it-- shouldn't go below -1...
    $self->inc($amt) unless($self->{_Iterator_index} < 0);
  }
#  print "index: ", $self->{_Iterator_index}, "bla: ",
#    $#{$self->{_Iterator_cached_list}}, "\n";
  #if the index is greater than the size of the cached_list, return false
  return undef if ( $self->{_Iterator_index} > $#{$self->{_Iterator_cached_list}} );
  #this should never happen - see if($self->{_Iterator_newly_refreshed}) @ top of _it_advance
  # as well as _iterator_has_stuff
  die "congratulations, you found a bug in XML::Comma. Please report it: empty iterator?!?" unless(@{$self->{_Iterator_cached_list}});

  my $id = $self->{_Iterator_Store}->id_from_location
    ( $self->{_Iterator_cached_list}->[$self->{_Iterator_index}] );

  return $self->{_Iterator_last_doc} = XML::Comma::Doc->read
    ( type => $self->{_Iterator_Store}->doctype(),
      store => $self->{_Iterator_Store}->element('name')->get(),
      id => $id );
}


# this function is a bit of a lie... it is just here to provide "while(++$it)"
# it might not work with {prev|next}_read semantics, due to boundary condition
sub _iterator_has_stuff {
  my $self = $_[0];
  #return false if _Iterator_cached_list is empty
  return undef unless scalar @{$self->{_Iterator_cached_list}};
  return ( $self->{_Iterator_direction} eq '-' ) ? 
    ( $self->{_Iterator_index} <= $#{$self->{_Iterator_cached_list}} ) :
    ( $self->{_Iterator_index} >= 0 );
}

sub read_doc {
  my ($self, %args) = @_;
  return $self->{_Iterator_last_doc} || (( $self->{_Iterator_direction} eq '-' ) ?
    $self->next_read(%args) : $self->prev_read(%args)); 
}

sub retrieve_doc {
  my $self = shift;
  my $doc = $self->read_doc(@_);
  $doc->get_lock();
  return $doc; 
}

# alias doc_(read && retrieve) to (read && retrieve)_doc 
# for API consistancy

*doc_retrieve = \&retrieve_doc;
*doc_read     = \&read_doc;

###
### /code to match iterator semantics
###

# store=>, size=>, pos=>
sub new {
  my ( $class, %arg ) = @_;
  my $self = {};
  # which store
  my $store = $self->{_Iterator_Store} = $arg{store} ||
    die "Storage Iterator needs a store to iterate across\n";
  my $extension = $store->extension() ||
    die "Storage Iterator requires a Store that provides an extension\n";
  # where do we start and how much do we want?
  my $size = $arg{size} || 0xffffffff;
  my $pos = $self->{_Iterator_direction} = $arg{pos} || '+';
  # build the cached list of locations -- we post-sort the results in
  # chunks again because find's preprocess block doesn't actually sort
  # the file contents of a directory;
  my $temp_by_dir = {};
  my $total_pushed = 0;
#  warn "pos: $pos, size: $size\n";
  find ( { preprocess => sub { 
#             warn "PREPROCESS SORT: @_ WILL BECOME: ", join(" ",
#               ($pos eq '+') ? sort @_ : reverse sort @_), "\n";
             return () if $total_pushed > $size;
             return ($pos eq '-') ? sort @_ : reverse sort @_;
           },
           wanted => sub {
             push ( @{$temp_by_dir->{$File::Find::dir}}, $File::Find::name )
               if  m|$extension$|;
           },
           postprocess => sub {
             $total_pushed += scalar ( @{$temp_by_dir->{$File::Find::dir}} )
               if  defined $temp_by_dir->{$File::Find::dir};
           }
         }, $store->base_directory()
       );

# useful in debugging sort order problems above
#  foreach my $k (sort keys %$temp_by_dir) {
#    warn "hmm: $k -> ", @{$temp_by_dir->{$k}}, "\n";
#  }

  # post-sort and set where we're starting from and our actual length
  if ( $pos eq '-' ) {
    map { push @{$self->{_Iterator_cached_list}}, @{$temp_by_dir->{$_}} }
      sort keys %{$temp_by_dir};
    $#{$self->{_Iterator_cached_list}} = $size-1  if
      ($size-1) < $#{$self->{_Iterator_cached_list}};
    $self->{_Iterator_index} = -1;
  } else {
    map { push @{$self->{_Iterator_cached_list}}, @{$temp_by_dir->{$_}} }
      reverse sort keys %{$temp_by_dir};
    if ( ($size-1) < $#{$self->{_Iterator_cached_list}} ) {
      $#{$self->{_Iterator_cached_list}} = $size-1;
    }
    @{$self->{_Iterator_cached_list}} =
      reverse @{$self->{_Iterator_cached_list}};
    $self->{_Iterator_index} = $#{$self->{_Iterator_cached_list}} + 1;
  }
#  foreach my $i (@{$self->{_Iterator_cached_list}}) {
#    warn "- $i\n";
#  }
  $self->{_Iterator_newly_refreshed} = 1; #for overloading
  # bless and return
  bless ( $self, $class );
  return $self;
}

sub length {
  return $#{$_[0]->{_Iterator_cached_list}} + 1;
}

sub index {
  return $_[0]->{_Iterator_index};
}

sub inc {
  return $_[0]->{_Iterator_index} += $_[1] || 1;
}

sub set {
  return $_[0]->{_Iterator_index} = $_[1];
}

sub next_id {
  my $self = shift();
  return if ( $self->{_Iterator_index} >= $#{$self->{_Iterator_cached_list}} );
  $self->inc();
  return $self->{_Iterator_Store}->id_from_location
    ( $self->{_Iterator_cached_list}->[$self->{_Iterator_index}] );
}

sub prev_id {
  my $self = shift();
  return  if ( $self->{_Iterator_index} <= 0 );
  $self->inc(-1);
  return $self->{_Iterator_Store}->id_from_location
    ( $self->{_Iterator_cached_list}->[$self->{_Iterator_index}] );
}

sub next_retrieve {
  my $id = $_[0]->next_id() || return;
  return $_[0]->{_Iterator_last_doc} = XML::Comma::Doc->retrieve 
    ( type => $_[0]->{_Iterator_Store}->doctype(),
      store => $_[0]->{_Iterator_Store}->element('name')->get(),
      id => $id );
}

sub prev_retrieve {
  my $id = $_[0]->prev_id() || return;
  return $_[0]->{_Iterator_last_doc} = XML::Comma::Doc->retrieve 
    ( type => $_[0]->{_Iterator_Store}->doctype(),
      store => $_[0]->{_Iterator_Store}->element('name')->get(),
      id => $id );
}

sub next_read {
  my ($self, %args) = @_;
  my $id = $self->next_id() || return;
  return $self->{_Iterator_last_doc} = XML::Comma::Doc->read 
    ( type => $self->{_Iterator_Store}->doctype(),
      store => $self->{_Iterator_Store}->element('name')->get(),
      id => $id, %args );
}

sub prev_read {
  my ($self, %args) = @_;
  my $id = $self->prev_id() || return;
  return $self->{_Iterator_last_doc} = XML::Comma::Doc->read
    ( type => $self->{_Iterator_Store}->doctype(),
      store => $self->{_Iterator_Store}->element('name')->get(),
      id => $id, %args );
}

sub doc_id {
  my $location = $_[0]->{_Iterator_cached_list}->[$_[0]->{_Iterator_index}];
  return $location ?
    $_[0]->{_Iterator_Store}->id_from_location($location) : 
    #the below is slow, but we only do it once per iteration. it's
    #needed in particular for storage iterator dispatch stuff
    $_[0]->read_doc->doc_id;
}

sub to_array {
  my ( $self, %args ) = @_; 
  my @docs = ();
  while($self++) {
    push @docs, $self->read_doc;
    #push @docs, XML::Comma::VirtualDoc->new($self); 
  }
  return @docs;
} 

####
# AUTOLOAD
#
#
####

sub AUTOLOAD {
  my ( $self, @args ) = @_;
  # strip out local method name and stick into $m
  $AUTOLOAD =~ /::(\w+)$/;  my $m = $1;
  return $self->iterator_dispatch ( $m, @args );
}

#note this is NOT slow, because read_doc does caching.
sub iterator_dispatch {
  my ( $self, $m, @args ) = @_;
  # if we're here, make sure next call to ++$it really does
  # advance
  $self->{_Iterator_newly_refreshed} = 0;

#warn "m: $m, args: ".join(" ", @args)."\n";
#warn "self: $self\n";
  my $doc = $self->read_doc();
#warn "doc: $doc\n";
  return $doc->$m(@args);
}

sub DESTROY { }


1;