The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
##
#
#    Copyright 2001-2005, AllAfrica Global Media
#
#    This file is part of XML::Comma
#
#    XML::Comma is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    For more information about XML::Comma, point a web browser at
#    http://xml-comma.org, or read the tutorial included
#    with the XML::Comma distribution at docs/guide.html
#
##

package XML::Comma::Indexing::Clean;

@ISA = ( 'XML::Comma::NestedElement',
         'XML::Comma::SQL::DBH_User' );

use XML::Comma::SQL::DBH_User; 
use XML::Comma::Util qw( dbg );

use Carp ();
use strict;

# what to stick in the _comma_flag slots while we work
my $clean_flag = 2;


# _Clean_order_by
# _Clean_data_table_name
# _Clean_table_name
# _Clean_sort_spec
# _Clean_in_progress
# _Clean_doctype
# _Clean_indexname

sub init_and_cast {
  my ( $class, %args ) = @_;
  my $self = $args{element} || die "need an element";
  $self->{_Clean_doctype} = $args{doctype};
  $self->{_Clean_indexname} = $args{index_name};
  $self->{_Clean_order_by} = $args{order_by};
  $self->{_Clean_table_name} = $args{table_name} || die "need a table name";
  $self->{_Clean_data_table_name} = $args{data_table_name};
  $self->{_Clean_bcollection_table_names} =
    $args{bcollection_table_names} || [];
  $self->{_Clean_sort_spec} = $args{sort_spec} || '';
  # populate $self->{ _DBH } and $self->{ _DBH_pid } so later we can call 
  # get_dbh() without the initial _connect() overhead.
  $self->{_DBH} = $args{dbh} and $self->{_DBH_pid} = $$;
  return XML::Comma::SQL::DBH_User::decorate_and_bless ( $self, $class );
}

sub clean {
  my $self = shift();
  my $dbh = $self->get_dbh_writer();
  my $order_by = $self->{_Clean_order_by};
  my $data_table_name = $self->{_Clean_data_table_name};
  my $table_name = $self->{_Clean_table_name};

  # prepare the erase where clause. we want to eval it if the first
  # character is a '{', otherwise, leave it alone
  my $ewc = $self->element('erase_where_clause')->get();
  my $erase_where_clause;
  if ( $ewc and $ewc =~ m|^\s*\{| ) {
    $erase_where_clause = eval $ewc;
    if ( $@ ) { die "error preparing erase_where_clause '$ewc': $@\n" }
  } else {
    $erase_where_clause = $ewc;
  }
  #dbg 'erase_wc', $erase_where_clause || "<undef>";

  # don't clean if table _comma flag is set
  if ( $self->sql_get_table_comma_flag($dbh, $table_name) ) {
    print "skipping clean on $table_name...";
    return;
  }
  $self->{_Clean_in_progress} = 1;
  # set table _comma flag
  $self->sql_set_table_comma_flag ( $dbh, $table_name, $clean_flag );
  # for table we care about: clear all _comma flags
  $self->sql_clear_all_comma_flags ( $dbh, $table_name );
  # first pass clean: for sort tables removes orphan entries, for both
  # sort and data tables removes rows matching any erase_where_clause
  $self->sql_set_comma_flags_for_clean_first_pass
    ( $dbh, $data_table_name, $table_name, $erase_where_clause, $clean_flag );
  $self->sql_delete_where_comma_flags ( $dbh, $table_name, $clean_flag );
  # second pass clean: arranges rows in order and removes rows above
  # our to_size limit
  if ( my $size_limit = $self->element('to_size')->get() ) {
    $self->sql_set_comma_flags_for_clean_second_pass
      ( $dbh,
        $table_name,
        $self->{_Clean_order_by},
        $self->{_Clean_sort_spec},
        $self->{_Clean_doctype},
        $self->{_Clean_indexname},
        $size_limit,
        $clean_flag );
    $self->sql_delete_where_comma_flags ( $dbh, $table_name, $clean_flag );
  }
  # and if we have any bcollection tables to clean, do them, too. it's
  # pretty kludgy to do this here rather than in a separate chunk of
  # code, but that's okay. at least we know everything's already set
  # up if we just go ahead and clean the bcollection tables inside our
  # data table clean "envelope". so we're not going to set the table
  # comma flags, etc. the sql looping in here also ought to be
  # combined with the nearly-identical loop in
  # sql_set_comma_flags_for_clean_second_pass. finally, we assume that
  # there are bcollection_table_names in our local slot only if this
  # Clean was created to work on the data table (but we don't check
  # that, to make sure). so our $table_name is the data table
  # name. (see, I told you it was kludgy)
  foreach my $bctn ( @{$self->{_Clean_bcollection_table_names}} ) {
    $self->sql_clear_all_comma_flags ( $dbh, $bctn );
    my $sth = $dbh->prepare ( $self->sql_clean_find_orphans ($bctn, $table_name) );
    $sth->execute();
    while ( my $row = $sth->fetchrow_arrayref() ) {
      my $orphan_id = $row->[0];
      $dbh->do ( "UPDATE $bctn SET _comma_flag=$clean_flag WHERE doc_id="
                 . $dbh->quote($orphan_id) );
    }
    $self->sql_delete_where_comma_flags ( $dbh, $bctn, $clean_flag );
  }
  # unset comma flag
  $self->sql_unset_table_comma_flag ( $dbh, $table_name );
  $self->{_Clean_in_progress} = 0;
}

sub DESTROY {
  my $self = shift();
  if ( $self->{_Clean_in_progress} ) {
    # un-set table _comma flag
    $self->sql_unset_table_comma_flag( $self->get_dbh_writer(),
                                $self->{_Clean_table_name} );
  }
}

1;