The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Koha::Contrib::Tamil::Zebra::Clouder;
{
  $Koha::Contrib::Tamil::Zebra::Clouder::VERSION = '0.021';
}
# ABSTRACT: Class generating keywords clouds from Koha Zebra indexes

use Moose;
use Carp;

extends 'AnyEvent::Processor';

my $MAX_OCCURENCE = 1000000000;

has koha => ( is => 'rw', isa => 'Koha::Contrib::Tamil::Koha' );

has index => (
    is => 'rw',
    isa => 'Str',
    trigger => sub {
        my ($self, $name) = @_;
        my $zc = $self->koha->zbiblio;
        eval {
            $zc->scan_pqf('@attr 1=' . $name . ' @attr 4=1 @attr 6=3 "a"');
        };
        croak "Invalid Zebra index: ", $name if $@;
        return $name;
    }
);

has levels_cloud => ( 
    is => 'rw', 
    isa => 'Int',
    default => 24
);

has max_terms => ( 
    is => 'rw', 
    isa => 'Int'
);

has number_of_terms => ( 
    is => 'rw', 
    isa => 'Int', 
    default => 0
); 

has terms => ( 
    is => 'rw', 
    isa => 'ArrayRef'
);

has min_occurence_index => (
    is => 'rw', 
    isa => 'Int', 
    default => -1
);

has min_occurence => ( 
    is => 'rw', 
    isa => 'Int', 
    default => 0
);

has from => (
    is => 'rw', 
    isa => 'Str', 
    default => '0'
);


#
# run 
#   Scan zebra index and populate an array of top terms
#
# PARAMETERS:
#   $max_terms    Max number of top terms
#
# RETURN:
#   A 4-dimensionnal array in $self->{terms}
#   [0] term
#   [1] term number of occurences
#   [2] term proportional relative weight in terms set E[0-1]
#   [3] term logarithmic relative weight E [0-levels_cloud]
#   
#   This array is sorted alphabetically by terms ([0])
#   It can be easily sorted by occurences:
#     @t = sort { $a[1] <=> $a[1] } @{$self->{top_terms}};
#
sub run {
    my $self = shift;
    $self->max_terms( shift );    
    $self->SUPER::run();
}


############################################################################
# Usage :   $zebra_index->process()
# Purpose : Do a Zebra ZOOM scan and keep top keywords
# Returns : TRUE(1) if end of index isn't reached, otherwise FALSE(0)
#
sub process {    
    my $self                = shift;
    my $max_terms           = $self->max_terms;    
    my $levels_cloud        = $self->levels_cloud;
    my $zbiblio             = $self->koha->zbiblio;
    my $number_of_terms     = $self->number_of_terms;
    my @terms               = $self->terms ? @{$self->terms} : ();
    my $min_occurence_index = $self->min_occurence_index;
    my $min_occurence       = $self->min_occurence;
    my $from                = $self->from;

    my $ss;
    SCAN:
    while (1) {
        eval {
            #print "$from\n" if $verbose;
            $from =~ s/\"/\\\"/g;
            my $query = '@attr 1=' . $self->index . ' @attr 4=1 @attr 6=3 "'
                        . $from . 'a"';
            $ss = $zbiblio->scan_pqf( $query );
        };
        if ($@) {
            chop $from;
            next SCAN;
        }
        last SCAN;
    }
    $ss->option( rpnCharset => 'UTF-8' );
    if ( $ss->size() == 0 ) { # End
        # Sort array of array by terms weight
        #@terms = sort { @{$a}[1] <=> @{$b}[1] } @terms;
    
        # A relatif weight to other set terms is added to each term
        my $min     = $terms[0][1];
        my $log_min = log( $min );
        my $max     = $terms[$#terms][1];
        my $log_max = log( $max );
        my $delta   = $max - $min;
        $delta = 1 if $delta == 0; # Very unlikely
        my $factor;
        if ($log_max - $log_min == 0) {
            $log_min = $log_min - $levels_cloud;
            $factor = 1;
        } 
        else {
            $factor = $levels_cloud / ($log_max - $log_min);
        }
    
        foreach my $term ( @terms ) {
            my $count      = @$term[1];
            my $weight     = ( $count - $min ) / $delta;
            my $log_weight = int( (log($count) - $log_min) * $factor);
            push @$term, $weight, $log_weight;
        }
        # Sort array of array by terms alphabetical order
        @terms = sort { @{$a}[0] cmp @{$b}[0] } @terms;
        $self->terms( \@terms );
        return 0;
    }
    else {
        my $term = '';
        my $occ = 0;
        $self->count( $self->count + $ss->size() );
        for my $index ( 0..$ss->size()-1 ) {
            ($term, $occ) = $ss->display_term($index);
            if ( $number_of_terms < $max_terms ) {
                push( @terms, [ $term, $occ ] ); 
                ++$number_of_terms;
                if ( $number_of_terms == $max_terms ) {
                    $min_occurence = $MAX_OCCURENCE;
                    for (0..$number_of_terms-1) {
                        my @term = @{ $terms[$_] };
                        if ( $term[1] <= $min_occurence ) {
                            $min_occurence       = $term[1];
                            $min_occurence_index = $_;
                        }
                    }
                }
            }
            else {
                if ( $occ > $min_occurence) {
                    @{ $terms[$min_occurence_index] }[0] = $term;
                    @{ $terms[$min_occurence_index] }[1] = $occ;
                    $min_occurence = $MAX_OCCURENCE;
                    for (0..$max_terms-1) {
                        my @term = @{ $terms[$_] };
                        if ( $term[1] <= $min_occurence ) {
                            $min_occurence       = $term[1];
                            $min_occurence_index = $_;
                        }
                    }
                }
            }
        }
        $self->number_of_terms(     $number_of_terms     ); 
        $self->terms(               \@terms              );
        $self->min_occurence_index( $min_occurence_index );
        $self->min_occurence(       $min_occurence       );
        $self->from(                $term                );
        return 1;
    }
}


sub process_message {
    my $self = shift;
    my $from = $self->from;
    $from = substr($from, 0, 70) . "..." if length($from) > 70;
    print sprintf("  %#6d", $self->count), " - ", $from, "\n";    
}


#
# Returns a HTML version of index top terms formated
# as a 'tag cloud'.
#
sub html_cloud {
    my $self = shift;
    my $koha_index = shift;
    my $withcss = shift;
    my @terms = @{ $self->terms() };
    my $html = '';
    if ( $withcss ) {
        $html = <<EOS;
<style>
.subjectcloud {
    text-align:  center; 
    line-height: 16px; 
    margin: 20px;
    background: #f0f0f0;
    padding: 3%;
}
.subjectcloud a {
    font-weight: lighter;
    text-decoration: none;
}
span.tagcloud0  { font-size: 12px;}
span.tagcloud1  { font-size: 13px;}
span.tagcloud2  { font-size: 14px;}
span.tagcloud3  { font-size: 15px;}
span.tagcloud4  { font-size: 16px;}
span.tagcloud5  { font-size: 17px;}
span.tagcloud6  { font-size: 18px;}
span.tagcloud7  { font-size: 19px;}
span.tagcloud8  { font-size: 20px;}
span.tagcloud9  { font-size: 21px;}
span.tagcloud10 { font-size: 22px;}
span.tagcloud11 { font-size: 23px;}
span.tagcloud12 { font-size: 24px;}
span.tagcloud13 { font-size: 25px;}
span.tagcloud14 { font-size: 26px;}
span.tagcloud15 { font-size: 27px;}
span.tagcloud16 { font-size: 28px;}
span.tagcloud17 { font-size: 29px;}
span.tagcloud18 { font-size: 30px;}
span.tagcloud19 { font-size: 31px;}
span.tagcloud20 { font-size: 32px;}
span.tagcloud21 { font-size: 33px;}
span.tagcloud22 { font-size: 34px;}
span.tagcloud23 { font-size: 35px;}
span.tagcloud24 { font-size: 36px;}
</style>
<div class="subjectcloud">
EOS
    }
    for (0..$#terms) {
        my @term = @{ $terms[$_] };
        my $uri = $term[0];
        $uri =~ s/\(//g;
        #print "  0=", $term[0]," - 1=", $term[1], " - 2=", $term[2], " - 3=", $term[3],"\n";
        $html = $html
            . '<span class="tagcloud'
            . $term[3]
            . '">'
            . '<a href="/cgi-bin/koha/opac-search.pl?q='
            . $koha_index
            . '%3A'
            . $uri
            . '">'
            . $term[0]
            . "</a></span>\n";
    }
    $html .= "</div>\n";
    return $html;
}

no Moose;

__PACKAGE__->meta->make_immutable;

1;



__END__
=pod

=encoding UTF-8

=head1 NAME

Koha::Contrib::Tamil::Zebra::Clouder - Class generating keywords clouds from Koha Zebra indexes

=head1 VERSION

version 0.021

=head1 AUTHOR

Frédéric Demians <f.demians@tamil.fr>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2012 by Fréderic Démians.

This is free software, licensed under:

  The GNU General Public License, Version 3, June 2007

=cut