The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use strict;
use warnings;
use Test::More tests => 8 ;

#use locale;
# (just for these tests, don't use locale so that the results are not
#   platform-dependent). 

BEGIN {use_ok("Search::Indexer");}

my $docs = {

  1 => qq{	Along the city streets
		It is still high tide,
                Yet the garrulous waves of life
		Shrink and divide
		With a thousand incidents
  		Vexed and debated:-
 		This is the hour for which we waited -                },

  2 => qq{	This is the ultimate hour
		When life is justified.
		The seas of experience
		That were so broad and deep
		So immediate and steep,
		Are suddenly still.
		You may say what you will,
		At such peace I am terrified.
		There is nothing else beside.                         },

#===================

  3 => qq{	Within this restless, hurried, modern world
		  We took our hearts' full pleasure - You and I,
  		And now the white sails of our ship are furled,
                  And spent the lading or our argosy.                 },

  4 => qq{ 	Wherefore my cheeks before their time are wan,
                  For very weeping is my gladness fled,
  		Sorrow has paled my young mouth's vermilion,
                  And Ruin draws the curtains of my bed.              },

  5 => qq{	But all this crowded life has been to thee
                  No more than lyre, or lute, or subtle spell
  		Of viols, or the music of the sea
                  That sleeps, a mimic echo, in the shell.            },

#====================

  6 => qq{	Come again:
		That I may cease to mourn
		Through thy unkind disdain
		For now left and forlorn
		I sit, I sigh, I weep, I faint, I die,
		In deadly pain and endless misery.                    },

  7 => qq{	Gentle love,
		Draw forth thy wounding dart,
		Thou canst not pierce her heart
		For I that to approve
		By sights and tears more hot than are thy shafts,
		Did tempt while she for mighty triumph laughs.        },

#====================

  8 => qq{	Rendete agli occhi miei, o fonte, o fiume,
		L'onde della non vostra salda vena,
		Che piu v'innalza e cresce, e con piu lena
		Che non e'l vostro natural costume                    },

  9 => qq{	E tu, folt'aria, che 'l celeste lume
		Porgi ai tristi occhi, dei sospir miei piena,
		Rendi questi al cor lasso, e rasserena
		Tua scura faccia, e'l pur tuo s'allume.               },

  10 => qq{	Renda la terra l'orme alle mie piante,
		L'erba, rigermogliando, che l'e tolta,
		Il suono eco infelice a' miei lamenti;                },

  11 => qq{	Gli sguardi agli occhi miei tue luci sante,
		Ch'io possa altra bellezza un'altra volta
		Amar, se sdegni i miei desiri ardenti                 },

#====================

  12 => qq{	Von Himmel hoch da komm ich her,
		Ich bring' euch gute neue Mähr,
		Der guten Mähr bring ich so viel,
		Davon ich sing'n und sagen will.                      },
 
  13 => qq{	Euch ist ein Kindlein heut' gebor'n 
                Von einer Jungfrau auserkor'n,
		Ein Kindelein so zart und fein,
		Das soll eu'r Freund und Wonne sein.                  },

#====================

  14 => qq{	Oui, ce monde est bien plat; quant à l'autre, sornettes,
		Moi, je vais, résigné, sans espoir, à mon sort
		Et pour tuer le temps, en attendant la mort,
		Je fume, au nez des Dieux, de fines cigarettes.       },

  15 => qq{	Allez, vivants, luttez, pauvres futurs squelettes !
		Moi, le méandre bleu qui vers le ciel se tord
		Me plonge en une extase infinie et m'endort
		Comme aux parfums mourants de mille cassolettes.      },

  16 => qq{	Et j'entre au paradis, fleuri de rêves clairs,
		Où viennent se mêler en valses fantastiques
		Des éléphants en rut à des choeurs de moustiques      },

  17 => qq{	Et puis, quand je m'éveille en songeant à mes vers
		Je contemple, le coeur plein d'une douce joie
		Mon cher pouce rôti comme une cuisse d'oie.           }

};



my $tsts = {

 'life' =>			# just a word
 {'1' => ['...        Yet the garrulous waves of <b>life</b>
		Shrink and divide
		With a thous...'                                      ],
  '2' => ['...	This is the ultimate hour
		When <b>life</b> is justified.
		The seas of experi...'                                ],
  '5' => ['...	But all this crowded <b>life</b> has been to thee
                 ...'                                                 ]},


 'garrulous OR argosy' =>	# did you know those ?
 {'1' => ['...high tide,
                Yet the <b>garrulous</b> waves of life
		Shrink and divide
...'                                                                  ],
  '3' => ['...       And spent the lading or our <b>argosy</b>.                 ...'                                                                 ]},


 '"it is still"' =>		# a sequence of words
 {'1' => ['...	Along the city streets
		<b>It is still</b> high tide,
                Yet the...'                                           ],
  '2' => []},  # wrong; indexer was fooled because 'it' and 'is' are stopwords

 '"occhi miei"' =>		# another sequence
 {'8' => ['...	Rendete agli <b>occhi miei</b>, o fonte, o fiume,
		L\'onde della ...'                                    ],
  '11' => ['...	Gli sguardi agli <b>occhi miei</b> tue luci sante,
		Ch\'io possa altr...'                                 ]},


 '(gute ODER guten) UND Mähr' => # boolean combination
 {'12' => ['...da komm ich her,
		Ich bring\' euch <b>gute</b> neue <b>Mähr</b>,
		Der <b>guten</b> <b>Mähr</b> bring ich so viel,
		Davon ich sin...'                                     ]},


 '+(je j moi) -mon' =>		# booleans through prefixes
 {'16' => ['...	Et <b>j</b>\'entre au paradis, fleuri de rêves ...'   ],
  '15' => ['...tez, pauvres futurs squelettes !
		<b>Moi</b>, le méandre bleu qui vers le ciel ...'     ]}

};


unlink foreach (<*.bdb>);	# remove previous index databases

my $i = new Search::Indexer(	# create indexer

	  writeMode => 1,

# just a couple of examples of stopwords
	  stopwords => [qw(a i o or of it is and are my the)],

# explicit setup of wregex : needed here to be sure to have the same
# results on every platform (the default qr/\w+/ would be locale-dependent).
          wregex => qr/[a-zçáàâäéèêëíìîïóòôöúùûüýÿ]+/i   );

$i->add($_, $docs->{$_}) foreach (keys %$docs);	# index all docs


foreach my $s (keys %$tsts) {
  my $r = $i->search($s);

  my %excerpts;
  foreach (keys %{$r->{scores}}) {
    $excerpts{$_} = $i->excerpts($docs->{$_}, $r->{regex});
  }
  is_deeply(\%excerpts, $tsts->{$s}, $s);
}

my $words_sa = $i->words("sa");
ok(eq_array($words_sa, [qw(sagen sails salda sans sante say)]),
   "words starting with 'sa'");

unlink foreach (<*.bdb>);	# remove index databases