#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
/* header */
/* the format could easily be improved by eliminating next
* if we could sort by hash value before writing the file. well,
* should have done it in perl in the first place, but this implementation
* uses very little memory, and our .dpo files might get LARGE. */
/* the next version might instead use the cdb format
* (http://cr.yp.to/cdb/cdb.txt), but that's not sure (it might
* be worse.
*/
#define DPO_VERSION 1
#define HASHSIZE 997
#define MAXHASH 40 /* the number of bytes to hash at most */
typedef U32 OFS;
typedef unsigned int HASH;
#define DPO_SIG 0x5000504f
#define DPO_SIG_SWAP 0x4f500050
/* dpo file header */
/* we assume natural alignment is ok */
struct dpo_head {
U32 sig;
U32 version;
U32 hashsize;
OFS hash[1];
/* followed by aligned dpo_str's */
};
#define SIZEOF_HDR(hashsize) (sizeof (struct dpo_head) + sizeof (OFS) * ((hashsize)-1))
/* each string has the following format */
struct dpo_str {
OFS next; /* inefficient, should be optimized by sorting buckets */
OFS len1;
OFS len2;
/* followed by two unterminated, unaligned strings */
};
/* We only hash the first MAXHASH bytes at max. This is a totally made-up,
* not researched at least one minute, probably very bad hashing function.
* But it is fast, and wasting diskspace for a too-large hash is ok.
*/
static HASH
hash (const unsigned char *msg, unsigned int len)
{
HASH hval = len;
if (len > MAXHASH)
len = MAXHASH;
if (len)
do {
hval ^= (hval << 4) + (hval >> 25) + (HASH)*msg++;
} while (--len);
return hval;
}
typedef struct dpo_writer {
int fd;
OFS ofs;
struct dpo_head *hdr;
} *PApp__I18n__DPO_Writer;
typedef struct dpo_table {
void *start;
size_t length;
SV *lang;
} *PApp__I18n__Table;
/* skip any leading \{} or \{\ tag */
#define SKIP_META(s,l) \
if (l >= 3 && s[0] == '\\' && s[1] == '{') \
{ \
/* escape sequence \{ found, skip it */ \
s += 2; \
l -= 2; \
/* if the full sequence is "\{\" then modify it */ \
/* to look like "\". otw. skip to trailing } */ \
if (*s != '\\') \
while (*s++ != '}') \
l--; \
}
MODULE = PApp::I18n PACKAGE = PApp::I18n::DPO_Writer
PROTOTYPES: ENABLE
PApp::I18n::DPO_Writer
new(class, path, hashsize = 997)
SV * class
char * path
int hashsize
CODE:
{
int fd = creat (path, 0666);
void *data;
if (fd <= 0)
croak ("DPO_Writer: unable to create '%s': %s", path, strerror (errno));
Newz(0, RETVAL, 1, struct dpo_writer);
Newz(0, data, SIZEOF_HDR (hashsize), char);
RETVAL->hdr = (struct dpo_head *) data;
RETVAL->fd = fd;
RETVAL->ofs = SIZEOF_HDR (hashsize);
RETVAL->hdr->sig = DPO_SIG;
RETVAL->hdr->version = DPO_VERSION;
RETVAL->hdr->hashsize = hashsize;
}
OUTPUT:
RETVAL
void
add(self, msgid, msgstr)
PApp::I18n::DPO_Writer self
SV * msgid
SV * msgstr
CODE:
sv_utf8_upgrade (msgid);
sv_utf8_upgrade (msgstr);
{
STRLEN len1, len2;
unsigned char *xmsgid = SvPV (msgid , len1);
unsigned char *xmsgstr = SvPV (msgstr, len2);
HASH hval = hash (xmsgid, len1) % self->hdr->hashsize;
struct dpo_str str;
OFS ofs;
SKIP_META (xmsgid, len1);
ofs = self->ofs;
/* align to sizeof OFS */
ofs += sizeof (OFS) - 1;
ofs -= ofs % sizeof (OFS);
str.len1 = len1;
str.len2 = len2;
str.next = self->hdr->hash[hval];
self->hdr->hash[hval] = ofs;
lseek (self->fd, ofs, SEEK_SET);
ofs += write (self->fd, &str, sizeof (struct dpo_str));
ofs += write (self->fd, xmsgid , len1);
ofs += write (self->fd, xmsgstr, len2);
self->ofs = ofs;
}
void
DESTROY(self)
PApp::I18n::DPO_Writer self
CODE:
lseek (self->fd, 0, SEEK_SET);
write (self->fd, self->hdr, SIZEOF_HDR (self->hdr->hashsize));
close (self->fd);
Safefree (self->hdr);
Safefree (self);
MODULE = PApp::I18n PACKAGE = PApp::I18n::Table
PApp::I18n::Table
new(class, path = 0, lang = &PL_sv_undef)
SV * class
char * path
SV * lang
CODE:
{
int fd;
void *start = 0;
size_t length;
if (path && *path)
{
struct dpo_head *hdr;
fd = open (path, O_RDONLY);
if (fd <= 0)
croak ("unable to open translation table '%s': %s", path, strerror (errno));
length = lseek (fd, 0, SEEK_END);
if (length < sizeof (struct dpo_head))
croak ("%s: translation table too short to be valid", path);
start = mmap (0, length, PROT_READ, MAP_SHARED, fd, 0);
if (start == MAP_FAILED)
croak ("unable to mmap translation table '%s': %s", path, strerror (errno));
close (fd);
hdr = (struct dpo_head *)start;
if (hdr->sig == DPO_SIG)
{
if (hdr->version != DPO_VERSION)
croak ("%s: unsupported translation table version (%d)", path, hdr->version);
}
else if (hdr->sig == DPO_SIG_SWAP)
croak ("%s: invalid translation table (probably byteswapped)", path);
else
croak ("%s: invalid translation table", path);
}
Newz(0, RETVAL, 1, struct dpo_table);
RETVAL->start = start;
RETVAL->length = length;
RETVAL->lang = newSVsv (lang);
}
OUTPUT:
RETVAL
SV *
lang(self)
PApp::I18n::Table self
CODE:
RETVAL = SvREFCNT_inc (self->lang);
OUTPUT:
RETVAL
void
DESTROY(self)
PApp::I18n::Table self
CODE:
if (self->start)
munmap (self->start, self->length);
if (self->lang)
SvREFCNT_dec (self->lang);
Safefree (self);
SV *
gettext(self, msgid)
PApp::I18n::Table self
SV * msgid
CODE:
{
STRLEN len;
char *xmsgid;
if (!SvUTF8 (msgid)) /* optimization */
sv_utf8_upgrade (msgid);
xmsgid = SvPV (msgid, len);
SKIP_META (xmsgid, len);
if (self->start)
{
struct dpo_head *hdr = (struct dpo_head *)self->start;
HASH hval = hash (xmsgid, len) % hdr->hashsize;
OFS ofs = hdr->hash[hval];
while (ofs)
{
struct dpo_str *str = (struct dpo_str *)(((char *)self->start) + ofs);
if (str->len1 == len && memcmp (str + 1, xmsgid, len) == 0)
{
RETVAL = newSVpvn (((char *)(str + 1)) + len, str->len2);
goto found;
}
ofs = str->next;
}
}
/* default: return "original" string */
RETVAL = newSVpvn (xmsgid, len);
found:
SvUTF8_on (RETVAL);
}
OUTPUT:
RETVAL