The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*
 * WuManber.xs
 * Copyright (c) 2007-2010, Juergen Weigert, Novell Inc.
 * This module is free software. It may be used, redistributed
 * and/or modified under the same terms as Perl itself.
 *
 * see perldoc perlxstut
 * see Rolf Stiebe; Textalgoritmen WS 2005/6
 * see TR94-17_WuManber.pdf
 */

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "wumanber_impl.h"


static void push_result(unsigned int idx, unsigned long offset, void *data)
{
  AV *r = (AV *)data;

  // In perl, indices run from 0..n_pat-1
  // In C, indices run from 1..n_pat

#if 1
  AV *loc = (AV *)sv_2mortal((SV *)newAV());
  av_push(loc, newSVnv(offset));
  av_push(loc, newSVnv(idx-1));
  av_push(r, newRV((SV *)loc));
#else
  av_push(r, newSVnv(offset));
  av_push(r, newSVnv(idx-1));
#endif
}

MODULE = Search::WuManber	PACKAGE = Search::WuManber
PROTOTYPES: ENABLE

#define BLOCK_SIZE 3
#define HASH1_SIZE 0x10
int
init_tables(obj)
    HV *obj

  PREINIT:
    AV* p;
    SV** pp;
    SV **svp;
    int i, n_patterns;
    unsigned char **pattern_list;
    unsigned int case_sensitive;

  INIT:
    // init PAT table
    pp = hv_fetch(obj, "patterns", 8, 0);
    if (!pp) croak("init_tables: no patterns in obj\n");

    // next test needed to avoid segv
    if (SvTYPE(SvRV(*pp)) != SVt_PVAV) croak("init_tables: patterns not an ARRAY-ref\n");
    p = (AV *)SvRV(*pp);
    n_patterns = av_len(p);
    pattern_list = (unsigned char **)calloc(sizeof(unsigned char *), n_patterns+2);

    svp = hv_fetch(obj, "case_sensitive", 14, 0); 
    if (!svp) croak("init_tables: no 'case_sensitive' in obj\n");
    case_sensitive = SvUV(*svp);

  CODE:
    i = 0;
    while (i++ <= n_patterns)
      {
        SV** ep = av_fetch(p, i-1, 0);
	STRLEN slen;
	unsigned char *e;

	// next test not really needed. perl converts almost anything to string.
	if (!SvPOK(*ep)) croak("init_tables: pattern[%d] is not a string\n", i);
	pattern_list[i] = e = (unsigned char *)SvPV(*ep, slen);

        // printf("pattern[%d] = '%s'\n", i, e);
      }
    pattern_list[i] = NULL;	// just to be sure

    struct WuManber *wm = (struct WuManber *)calloc(1, sizeof(struct WuManber));
    wm->progname = "perl(Search::WuManber)";
    prep_pat(wm, n_patterns+1, pattern_list, !case_sensitive);

    // FIXME: this needs a destructor, to free the memory bound in wm's pointers.
    (void)hv_store(obj, "wm", 2, newSVpvn((char *)wm, sizeof(*wm)), 0); 

    (void)hv_store(obj, "BLOCK_SIZE", 9, newSViv(wm->use_bs1?1:(wm->use_bs3?3:2)), 0); 
    RETVAL = 1;
  OUTPUT:
    RETVAL


SV *
find_all(obj,textsv)
    HV *obj
    SV *textsv

  PREINIT:
    AV *r;	// return value
    STRLEN text_len, n;
    unsigned char *text;
    struct WuManber *wm;
    SV **svp;

    text = (unsigned char *)SvPV(textsv, text_len);
    // warn("find_all: text='%s', text_len=%d\n", text, (unsigned int)text_len);

  INIT:
    svp = hv_fetch(obj, "wm", 2, 0); 
    if (!svp) croak("find_all: no 'wm' in obj\n");
    wm = (struct WuManber *)SvPV(*svp, n);
    if (!svp) croak("find_all: sizeof(wm)=%d, expected %d\n", (int)n, (int)sizeof(struct WuManber));
    search_init(wm, "argv[0]");

    r = (AV *)sv_2mortal((SV *)newAV());
    wm->cb = push_result;
    wm->cb_data = (void *)r;

  CODE:
    search_text(wm, text, text_len);
    RETVAL = newRV((SV *)r);
  OUTPUT:
    RETVAL