lib/AI/MicroStructure/Memorizer.pm

#!/usr/bin/perl -W
package AI::MicroStructure::Memorizer;
use strict;
use warnings;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use File::Spec;
use Data::Dumper;
use Search::ContextGraph;
use Cache::Memcached::Fast;
my $count;
my %known_ips;
my %known_names;
my @output;
my @thread;
my $thread_support;
my @common_cnames;
my $threads;
my $book= "";
$count = 0;

# try and load thread modules, if it works import their functions
BEGIN {
  eval {
    require threads;
    require threads::shared;
    require Thread::Queue;
    $thread_support = 1;
  };
  if ($@) { # got errors, no ithreads :(
    $thread_support = 0;
  } else { #safe to haul in the threadding functions
    import threads;
    import threads::shared;
    import Thread::Queue;
  }
}

# turn errors back on
BEGIN {
  $SIG{__DIE__}  = 'DEFAULT';
  $SIG{__WARN__} = 'DEFAULT';
}


sub threadSupport{
    my $self = shift;
    my $bookpath = shift;

  my @books = $self->getBookList($bookpath);
  my $buffer={};

  if ($threads) {
    share($count);
    share(%known_ips);
    share(%known_names);
    share(@output);
    my $stream = new Thread::Queue;
    foreach my $book (@books) {

      $stream->enqueue($book);
    }
    for (0..$threads) {
      my $kid = new threads($self->store($book), $stream);
      $stream->enqueue(undef); #for each thread
    }
    foreach my $thread (threads->list) {
     $thread->join if ($thread->tid && !threads::equal($thread, threads->self));
    }
  } else {
    foreach my $book (@books) {
      $self->search($book);
    }
  }
  # write to file any output generated by child threads
  print  @output;

}


sub search {

  my $self= shift;
  my $namehex = "";
  my $upstream;
  my $tid;
  my $resbf;
  if ($threads && threads->self->tid()) {
    $upstream = shift;
    $tid      = threads->self->tid();
    $resbf    = 1;
    warn $tid;
  } else {
    $resbf = 0;
  }

my $i=0;
my  $returns = {};
my  $content = {};
my $name="";
      # only runs once if not threaded
  while (my $search_item = $threads ? $upstream->dequeue : shift) {

    $namehex = md5_hex($search_item);

    next if($search_item eq "." || $search_item eq "..");

    $content = $self->{cache}->retrieve($namehex);
    $name = sprintf("%s",$search_item);

    if(defined($content->{body})){

      $returns->{$namehex} =
        {subject => $content->{subject},
         body    => $content->{body},
         name    => $name,
         md5hex  => $namehex};

    }else{

      $content  = $self->catfile($_);

      $returns->{$namehex} =
        {subject=>$content->{subject},
         body=>$content->{body},
         name=>$name,
         md5hex=>$namehex};

      $self->{cache}->store($namehex,$content);

    }


    $self->{booknames}->{$namehex}=$name;

    $self->{chaps}->{$namehex} = {subject=>$content->{subject},
                      body=>$content->{body}};
  # print $search_item."\n";
    return  $self->{chaps};


  }
}


sub new {
  my $this = shift;
  my $class = ref($this) || $this;
  my $self = {};
  bless $self, $class;
  $self->initialize(@_);

  return $self;
}

sub initialize {
  my $self = shift;
  %$self=@_;

  $self->{cache} =   new Cache::Memcached::Fast({
      servers => [ { address => 'localhost:11211',
                     weight => 2.5 }],
      namespace => 'my:',
      connect_timeout => 0.2,
      io_timeout => 0.5,
      close_on_error => 1,
      compress_threshold => 100_000,
      compress_ratio => 0.9,
      compress_methods =>
       [ \&IO::Compress::Gzip::gzip,
         \&IO::Uncompress::Gunzip::gunzip ],
      max_failures => 3,
      failure_timeout => 2,
      ketama_points => 150,
      nowait => 1,
      hash_namespace => 1,
      serialize_methods => [ \&Storable::freeze,
                             \&Storable::thaw ],
      utf8 => ($^V ge v5.8.1 ? 1 : 0),
      max_size => 4*512 * 1024,
  });




#AI::MicroStructure::Driver::Memcached->new();


  $self->{context} = Search::ContextGraph->new( use_file  => $self->{contextgraph}, auto_reweight => 0);

}


sub trim
{
  my $self = shift;
  my $string = shift;
  $string =  "" unless  $string;
  $string =~ s/^\s+//;
  $string =~ s/\s+$//;
  $string =~ s/\t//;
  $string =~ s/^\s//;
  return $string;
}


sub catfile{
  my $self = shift;
  my $file=shift;
     return unless($file);

my $path = sprintf("%s/%s",$self->{bookpath},$file);
#  print $path;
  my $cat = {};
  my @cat = map{
               my @x = split(":",$_);
                  $_ = $self->trim($x[1]);
               }split("\n",
    `microdict $path | data-freq --limit 500`);

  $cat->{subject} = [@cat[0..10]];
  $cat->{body}    = [@cat];

  return $cat;

}



sub store{

my $self = shift;
my $xname = shift;
my    $content = {};
my    $namehex = md5_hex($xname);
      $content = $self->{cache}->retrieve($namehex);


    if(defined($content->{body})){

      $self->{returns}->{$namehex} =
        {subject => $content->{subject},
         body    => $content->{body},
         name    => $xname,
         md5hex  => $namehex};

    }else{

      $content  = $self->catfile($_);

      $self->{returns}->{$namehex} =
        {subject=>$content->{subject},
         body=>$content->{body},
         name=>$xname,
         md5hex=>$namehex};

      $self->{cache}->store($namehex,$content);

    }



}
sub getBookList{

  my $self = shift;
  my $dir  = shift;

  $dir = $self->{bookpath}      unless defined($dir);
  die "$dir is not a directory" unless -d $dir;
  opendir(DIR, $dir) or die $!;

  my @mp3s = grep { $_ = sprintf("%s",$_);  }
              sort grep /^[\x20-\x7E]+$/,
              readdir(DIR);

  closedir DIR;

  return @mp3s;
}

sub analyseBookNames{

  my $self = shift;
  my $bookpath = shift;
  my $returns = {};
  my @books = $self->getBookList($bookpath);
  my @data = ();
  my $content = {};
  my $name = "";
  my $namehex = "";



  foreach(@books) {
    $content = {};
    $namehex = md5_hex($_);

    next if($_ eq "." || $_ eq "..");

    $content = $self->{cache}->retrieve($namehex);
    $name = sprintf("%s",$_);

    if(defined($content->{body})){

      $returns->{$namehex} =
        {subject => $content->{subject},
         body    => $content->{body},
         name    => $name,
         md5hex  => $namehex};

    }else{

      $content  = $self->catfile($_);

      $returns->{$namehex} =
        {subject=>$content->{subject},
         body=>$content->{body},
         name=>$name,
         md5hex=>$namehex};

      $self->{cache}->store($namehex,$content);

    }

    next if($name=~/dvd/);

    $self->{booknames}->{$namehex}=$name;

    $self->{chaps}->{$namehex} = {subject=>$content->{subject},
                      body=>$content->{body}};


  }


  return $returns;

}




sub perform_standard_tests {

  my $self = shift;
     $self->analyseBookNames();

  my $booklist = $self->{chaps};

   foreach my $k ( reverse sort { $b cmp $a } keys %{$self->{chaps}}){
   if(defined($k)){
      my @sub = @{$booklist->{$k}->{subject}};
      my @body = @{$booklist->{$k}->{body}};


      $self->{out} .= sprintf("%s\t[s=%d,b=%d]\t%s\n",$k,$#sub,$#body,$self->{booknames}->{$k});


      if($#body>0){
        $self->{context}->add_document($self->{booknames}->{$k}, \@body);
      }
    }
  }


  return $self->{chaps};


}
sub getTermList{
  my $self = shift;
  return $self->{context}->term_list;


}

sub query_simple_search{

    my $self = shift;

    my $term = shift;

    return $self->{context}->simple_search($term);

}



sub query_similareDocs {

        my $self = shift;
        my $term = shift;

        $self->{out}="";

        my @ranked_docs = $self->simple_search($term);

        my ($s1,$s2) = $self->{context}->find_similar(@ranked_docs);

        #print Dumper $s2;
        foreach (sort {$a cmp $b} keys %$s1)
        {
          next unless($s1->{$_}>=0.75);

          $self->{out} .=  sprintf("\n%s = %s",$_, $s1->{$_});
        }

          $self->{out} .=  sprintf "\n<br>\n<b>compute strong similar terms</b>";

        #print Dumper $s2;
        foreach (sort {$a cmp $b} keys %$s2)
        {
          next unless($s2->{$_}>=0.75);

        $self->{out} .=  sprintf("\n%s = %s",$_, $s2->{$_});
        }


$self->{out} .=  sprintf "\n<br>\n<b>worndnet suport / compute strong similar terms</b>";

        #print Dumper $s2;
        foreach (sort {$a cmp $b} keys %$s2)
        {
          next unless($s2->{$_}>=0.75);

$self->{out} .=  sprintf("\n(%s%s)\n%3.3f",sprintf($self->trim(`micro-wnet $_`)),$_,$s2->{$_});
        }


    return $self->{out};
}

sub query_simple_intersection{

    my $self = shift;

    my @terms = @_;

    my @ranked_docs = $self->{context}->intersection(terms=>@terms);

    return \@ranked_docs;
}

sub query_simple_mixed{

    my $self = shift;

    my @terms =  shift;

    my ( $docs, $words ) = $self->{context}->mixed_search(
        { docs=>[$self->{context}->doc_list],
         terms => [@terms]});

    return ( $docs, $words );

}

sub sampleRun{

    my $self = shift;
    my $intrest = {};


    $intrest->{base}="physics";
    $intrest->{second}=lc `micro`;

    $intrest->{base}=~s/_|\n/ /g;
    $intrest->{second}=~s/_|\n/ /g;

    my @ranked_docs = $self->{context}->simple_search($intrest->{base});



    my $out = "";
$self->{out} .=  sprintf "<hr/><h1>%s only</h1>\n",$intrest->{base};

  printf Dumper [
      $self->{context}->intersection(
        terms => [$intrest])];

$self->{out} .=  sprintf "<hr/><h1>%s + %s</h1>\n",$intrest->{base},$intrest->{second};

printf Dumper [
      $self->{context}->intersection(
        terms => [$intrest->{base},$intrest->{second}])];


$self->{out} .=  sprintf "<hr/><h1>%s + %s + synthetic</h1>\n",$intrest->{base},$intrest->{second};
$self->{out} .=  sprintf Dumper [
      $self->{context}->intersection(
        terms => [$intrest->{base},$intrest->{second}] )];

$self->{out} .=  sprintf "<hr/><h1>drill down</h1>\n\n";


    @ranked_docs = $self->{context}->intersection(
      terms=>[$intrest->{base},$intrest->{second}]);

$self->{out} .=  sprintf
      "<hr/><h1>intersection[programming,computer]".
      "</h1>\n\n",
      join "<br />\n",@ranked_docs;

$self->{out} .=  sprintf "<hr/><h1>mixed_search on intersection</h1>\n\n";

    my ( $docs, $words ) =
      $self->{context}->mixed_search(
        {docs=>[@ranked_docs],
         terms => ['genetic',
                   'dreaming',
                   'evolutionary']});

        my $similarToAi = {};

         # Print out result set of returned documents
         foreach my $k ( sort { $docs->{$b} <=> $docs->{$a} }
             keys %{ $docs } ) {

             if($docs->{$k}>=100.01) {

            $self->{out} .=  sprintf "alpha\trelevance ",
                  $docs->{$k},"\tDoc ",$k,"\n";

            }elsif($docs->{$k}<=101.0 && $docs->{$k}>=100.0){

            $self->{out} .=  sprintf "dominant\trelevance",
                   $docs->{$k},"\tDoc ",$k,  "\n";

            }
         }

        push @ranked_docs , $self->{context}->simple_search({
          docs=>[$self->{context}->doc_list] ,
          terms=>['programming']});


#        print Dumper @ranked_docs;

      $self->{out} .=   sprintf "<hr/><h1>simple_search on fuzzy $#ranked_docs</h1>".
              "\n<br>\n<b>compute strong similar docs</b>";

        #$self->{context}->reweight_graph();

        my ($s1,$s2) = $self->{context}->find_similar(@ranked_docs);

        #print Dumper $s2;
        foreach (sort {$a cmp $b} keys %$s1)
        {
          next unless($s1->{$_}>=0.75);

$self->{out} .=  sprintf("\n%s = %s",$_, $s1->{$_});
        }

$self->{out} .=  sprintf "\n<br>\n<b>compute strong similar terms</b>";

        #print Dumper $s2;
        foreach (sort {$a cmp $b} keys %$s2)
        {
          next unless($s2->{$_}>=0.75);

        $self->{out} .=  sprintf("\n%s = %s",$_, $s2->{$_});
        }


$self->{out} .=  sprintf "\n<br>\n<b>worndnet suport / compute strong similar terms</b>";

        #print Dumper $s2;
        foreach (sort {$a cmp $b} keys %$s2)
        {
          next unless($s2->{$_}>=0.995);

$self->{out} .=  sprintf("\n(%s=%3.3f)",$_,$s2->{$_});
        }

return $self->{out};

    }

sub getDocList{
  my $self = shift;
  return $self->{context}->doc_list;

}


1;

# ABSTRACT: this is part of AI-MicroStructure the package handels relations between concepts


=head1 NAME

  AI::MicroStructure::Memorizer

=head1 DESCRIPTION

  Gets Relations for Concepts based on  words

=head1 SYNOPSIS


  ~$ micro new world

  ~$ micro structures

  ~$ micro any 2

  ~$ micro drop world

  ~$ micro


=head1 AUTHOR

  Hagen Geissler <santex@cpan.org>

=head1 COPYRIGHT AND LICENCE

  Hagen Geissler <santex@cpan.org>

=head1 SUPPORT AND DOCUMENTATION

  ☞ [sample using concepts](http://quantup.com)

  ☞ [PDF info on my works](https://github.com/santex)


=head1 SEE ALSO

  AI-MicroStructure
  AI-MicroStructure-Cache
  AI-MicroStructure-Deamon
  AI-MicroStructure-Relations
  AI-MicroStructure-Concept
  AI-MicroStructure-Data
  AI-MicroStructure-Driver
  AI-MicroStructure-Plugin-Pdf
  AI-MicroStructure-Plugin-Twitter
  AI-MicroStructure-Plugin-Wiki

=cut

__END__
__DATA__


package main;
use Data::Printer;
my $configure = {};
my $memo   = AI::MicroStructure::Memorizer->new();
$memo->getBookList();
p $memo
__DATA__


  new Cache::Memcached::Fast({
      servers => [ { address => 'localhost:11211',
                     weight => 2.5 }],
      namespace => 'my:',
      connect_timeout => 0.2,
      io_timeout => 0.5,
      close_on_error => 1,
      compress_threshold => 100_000,
      compress_ratio => 0.9,
      compress_methods =>
       [ \&IO::Compress::Gzip::gzip,
         \&IO::Uncompress::Gunzip::gunzip ],
      max_failures => 3,
      failure_timeout => 2,
      ketama_points => 150,
      nowait => 1,
      hash_namespace => 1,
      serialize_methods => [ \&Storable::freeze,
                             \&Storable::thaw ],
      utf8 => ($^V ge v5.8.1 ? 1 : 0),
      max_size => 4*512 * 1024,
  });
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)