#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
#include <string.h>
/* Including the chm header */
#include <chm_lib.h>
#define CHM_MAX_BUF 65536
static char * my_strndup(const char *, size_t );
/*
* This struct will hold some necessary data for the extension and is
* the opaque C-object returned by new().
*
*/
typedef struct
{
struct chmFile* chmfile;
char* filename;
} ChmObj;
/*
* ChmObjData holds some infos about a particular member of the chm
* file. It will be used to build a linked list that contains infos
* about the content of the chm file. The member of the structure are:
*
* - path is the complete path of the member;
* - title is the title of the member if it is an html file;
* - size is the file-size of the member;
* - next is a pointer to the next structure .
*
*/
struct ChmObjData
{
char *path;
char *title;
size_t size;
struct ChmObjData *next;
};
/*
* We must(?) make this variable global, so it could be used in
* _chm_enumerate_callback().
*
*/
struct ChmObjData *data = NULL;
/*
* This function creates a new node for the linked list containing the
* filelist of the chm file.
*
*/
static struct ChmObjData *chm_data_add(char *path, char *title, size_t size)
{
struct ChmObjData *tmp;
tmp = calloc(1, sizeof(struct ChmObjData));
if ( NULL == tmp )
croak("Out of memory\n");
tmp->path = my_strndup(path, strlen(path));
tmp->title = title;
tmp->size = size;
tmp->next = NULL;
return tmp;
}
/*
* Init the ChmObject.
*
*/
static ChmObj *perl_chm_init(const char* filename)
{
ChmObj *self = NULL;
self = Newz(1, self, 1, ChmObj);
if ( NULL == self )
croak("Out of memory");
self->filename = calloc(strlen(filename) + (size_t)1, sizeof(char));
if ( !self->filename )
croak("Out of memory");
else
strncpy(self->filename, filename, strlen(filename));
self->chmfile = chm_open(self->filename);
if ( NULL == self->chmfile )
croak("Can't open file %s\n", self->filename);
return self;
}
/*
* Get rid of faulty strndup implementations...
*
*/
static char * my_strndup(const char *src, size_t len)
{
char *ret = NULL;
ret = calloc(len + (size_t)1, sizeof(char));
if ( !ret )
croak("Out of memory\n");
strncpy(ret, src, len);
*(ret + (int)len) = '\0';
return ret;
}
/*
* Given a valid "path", perl_get_object() fetch its content from
* chmfile, setting len to the length of the string that contains it
* (len is a value-result argument).
*
*/
static unsigned char *perl_get_object(struct chmFile *chmfile, const char *path, size_t *len)
{
struct chmUnitInfo ui;
unsigned char chm_buffer[CHM_MAX_BUF], *retbuf;
int offset, swath;
if ( CHM_RESOLVE_SUCCESS != chm_resolve_object(chmfile, path, &ui) )
croak("Can't resolve given path\n");
Newz(1, retbuf, (size_t)ui.length, unsigned char);
if ( NULL == retbuf )
croak("Out of memory\n");
swath = CHM_MAX_BUF;
offset = 0;
*len = (size_t)ui.length;
while ( offset < ui.length ) /* Let's read the file at swath bytes per time, */
{ /* incrementing offset with the data we've read */
if ( (ui.length - offset) < CHM_MAX_BUF )
swath = ui.length - offset;
else
swath = CHM_MAX_BUF;
swath = (int)chm_retrieve_object(chmfile, &ui, chm_buffer, offset, swath);
memcpy((retbuf + offset), chm_buffer, (size_t)swath);
offset += swath;
}
return retbuf;
}
static int file_is_normal(const char *filename)
{
/*
* For the moment, we are intrested only on normal files, such as
* html, css, images, etc... So we limit to look to these.
*
*/
if ( '/' == *filename ) /* SPECIAL or NORMAL file */
{
if ( ('#' == *(filename + 1)) || ('$' == *(filename + 1)) ) /* SPECIAL file */
return 0;
else /* NORMAL file */
return 1;
}
else /* META file */
return 0;
}
/*
* Check that the file at "path" is an html one, taking this decision
* depending on the extension of the file.
*
*/
static int file_is_html(const char *path)
{
char *tmp;
if ( (int)strlen(path) < 4 ) /* Path must be at least 5 char long. */
return 0;
if ( NULL != (tmp = strrchr(path, '.')) )
if ( 0 == strncasecmp(++tmp, "htm", (size_t)3) ||
0 == strncasecmp(tmp, "html", (size_t)4) )
return 1;
return 0;
}
/*
* Traverse the content of the string "s", which contains html code,
* to find the text between <title> and </title>, and return it.
*
*/
static char *find_title(char *s)
{
char *tmp = s;
size_t len;
while ( tmp++ )
{
tmp = strchr(tmp, '<');
if ( 0 == strncasecmp(tmp, "<title>", (size_t)7) )
{
tmp += 7;
len = (size_t) (strchr(tmp, '<') - tmp);
return my_strndup(tmp, len);
}
else
continue;
}
return NULL;
}
/*
* Glue-function for getting the title of an html file and returning
* it.
*
*/
static char *get_html_title(struct chmFile *chmfile, const char *filename)
{
char *content, *title;
size_t len;
content = perl_get_object(chmfile, filename, &len);
title = find_title(content);
return title;
}
/*
* Callback function to be passed to chm_enumerate(), in order to
* build a linked list of ChmObjData structures that contains some
* informations about the "normal" members (html, css, xml and image
* files) of the chm file.
*
*/
static int _chm_enumerate_callback(struct chmFile *h, struct chmUnitInfo *ui, void *context)
{
char *title;
struct ChmObjData *tmp;
if ( file_is_normal(ui->path) )
{
title = ((file_is_html(ui->path)) ? get_html_title(h, ui->path) : NULL);
tmp = chm_data_add(ui->path, title, ui->length);
data->next = tmp;
data = data->next;
tmp = NULL;
}
return CHM_ENUMERATOR_CONTINUE;
}
MODULE = Text::CHM PACKAGE = Text::CHM
ChmObj *
new( CLASS, file )
char * CLASS;
char * file;
CODE:
RETVAL = perl_chm_init(file);
if ( NULL == RETVAL )
XSRETURN_UNDEF;
OUTPUT:
RETVAL
void
DESTROY( self )
ChmObj * self;
CODE:
chm_close(self->chmfile);
safefree(self);
char*
filename( self )
ChmObj * self;
CODE:
RETVAL = self->filename;
OUTPUT:
RETVAL
void
close( self )
ChmObj * self;
CODE:
chm_close(self->chmfile);
void *
get_object( self, objname )
ChmObj * self;
char * objname;
INIT:
unsigned char * buf;
size_t len;
PPCODE:
buf = perl_get_object(self->chmfile, objname, &len);
XPUSHs(sv_2mortal(newSVpv(buf, len)));
void *
get_filelist( self )
ChmObj * self;
INIT:
struct ChmObjData *contents = NULL;
HV * hash;
PPCODE:
contents = chm_data_add("start", "start", 0);
data = contents;
if ( !chm_enumerate(self->chmfile, CHM_ENUMERATE_ALL, _chm_enumerate_callback, NULL ) )
croak("Errors getting filelist\n");
data = contents->next;
while ( data )
{
hash = newHV();
hv_store(hash, "path", 4, newSVpv(data->path, strlen(data->path)), 0);
hv_store(hash, "size", 4, newSViv(data->size), 0);
if ( data->title )
hv_store(hash, "title", 5, newSVpv(data->title, strlen(data->title)), 0);
else
hv_store(hash, "title", 5, newSV(0), 0);
XPUSHs(sv_2mortal(newRV((SV *)hash)));
data = data->next;
}