/* $Id: map8x.c,v 1.7 2001/12/31 18:42:50 gisle Exp $
*
* Copyright 1998, Gisle Aas.
*
* This library is free software; you can redistribute it and/or
* modify it under the same terms as Perl itself.
*/
#include "map8.h"
#include <memory.h>
#include <stdlib.h>
static U16* nochar_map = 0;
static int num_maps = 0;
Map8*
map8_new()
{
Map8* m;
int i;
m = (Map8*)malloc(sizeof(Map8));
if (!m) abort(); /* out of memory */
if (!nochar_map) {
/* initialize the shared array for second level u16 mapping */
nochar_map = (U16*)malloc(sizeof(U16)*256);
if (!nochar_map) abort(); /* out of memory */
for (i = 0; i < 256; i++)
nochar_map[i] = NOCHAR;
}
for (i = 0; i < 256; i++) {
m->to_16[i] = NOCHAR;
m->to_8[i] = nochar_map;
}
m->def_to8 = NOCHAR;
m->def_to16 = NOCHAR;
m->cb_to8 = 0;
m->cb_to16 = 0;
m->obj = 0;
num_maps++;
/* fprintf(stderr, "New %p (%d created)\n", m, num_maps); */
return m;
}
void
map8_addpair(Map8* m, U8 u8, U16 u16)
{
U8 hi = u16 >> 8;
U8 lo = u16 & 0xFF;
U16* himap = m->to_8[hi];
if (himap == nochar_map) {
int i;
U16* map = (U16*)malloc(sizeof(U16)*256);
if (!map) abort(); /* out of memory */
for (i = 0; i < 256; i++) {
map[i] = NOCHAR;
}
map[lo] = u8;
m->to_8[hi] = map;
} else if (himap[lo] == NOCHAR)
himap[lo] = u8;
if (m->to_16[u8] == NOCHAR)
m->to_16[u8] = htons(u16);
}
void
map8_nostrict(Map8* m)
{
int i;
if (!m) return;
for (i = 0; i < 256; i++) {
if (map8_to_char8(m, i) != NOCHAR)
continue;
if (map8_to_char16(m, i) != NOCHAR)
continue;
map8_addpair(m, i, i);
}
}
static char*
my_fgets(char* buf, int len, PerlIO* f)
{
int pos = 0;
int ch;
while (1) {
ch = PerlIO_getc(f);
if (ch == EOF)
break;
if (pos < len - 1)
buf[pos++] = ch;
if (ch == '\n')
break;
}
buf[pos] = '\0';
return pos ? buf : 0;
}
Map8*
map8_new_txtfile(const char *file)
{
dTHX;
Map8* m;
int count = 0;
PerlIO* f;
char buf[512];
f = PerlIO_open(file, "r");
if (!f)
return 0;
m = map8_new();
while (my_fgets(buf, sizeof(buf), f)) {
char *c1 = buf;
char *c2;
long from;
long to;
from = strtol(buf, &c1, 0);
if (buf == c1 || from < 0 || from > 255)
continue; /* not a valid number */
to = strtol(c1, &c2, 0);
if (c1 == c2 || to < 0 || to > 0xFFFF)
continue; /* not a valid second number */
if (0 && from == to)
continue;
map8_addpair(m, from, to);
count++;
}
PerlIO_close(f);
if (!count) /* no mappings found */ {
map8_free(m);
return 0;
}
return m;
}
Map8*
map8_new_binfile(const char *file)
{
dTHX;
Map8* m;
int count = 0;
int n;
int i;
PerlIO* f;
struct map8_filerec pair[256];
f = PerlIO_open(file, "rb");
if (!f)
return 0;
if (PerlIO_read(f, pair, sizeof(pair[0])) != sizeof(pair[0]) ||
pair[0].u8 != htons(MAP8_BINFILE_MAGIC_HI) ||
pair[0].u16 != htons(MAP8_BINFILE_MAGIC_LO))
{
/* fprintf(stderr, "Bad magic\n"); */
PerlIO_close(f);
return 0;
}
m = map8_new();
while ( (n = PerlIO_read(f, pair, sizeof(pair))) > 0)
{
n /= sizeof(pair[0]);
for (i = 0; i < n; i++) {
U16 u8 = ntohs(pair[i].u8);
U16 u16 = ntohs(pair[i].u16);
if (u8 > 255) continue;
count++;
map8_addpair(m, (U8)u8, u16);
}
}
PerlIO_close(f);
if (!count) /* no mappings found */ {
map8_free(m);
return 0;
}
return m;
}
void
map8_free(Map8* m)
{
int i;
if (!m) return;
for (i = 0; i < 256; i++) {
if (m->to_8[i] != nochar_map)
free(m->to_8[i]);
}
free(m);
if (--num_maps == 0) {
free(nochar_map);
nochar_map = 0;
}
/* fprintf(stderr, "Freeing %p (%d left)\n", m, num_maps); */
}
#ifndef PERL
U16* map8_to_str16(Map8* m, U8* str8, U16* str16, int len, int* rlen)
{
U16* tmp16;
if (str8 == 0)
return 0;
if (len < 0)
len = strlen(str8);
if (str16 == 0) {
str16 = (U16*)malloc(sizeof(U16)*(len+1));
if (!str16) abort();
}
tmp16 = str16;
while (len--) {
U16 c = map8_to_char16(m, *str8);
if (c != NOCHAR) {
*tmp16++ = c;
} else if (m->def_to16 != NOCHAR) {
*tmp16++ = m->def_to16;
} else if (m->cb_to16) {
U16* buf;
STRLEN len;
buf = (m->cb_to16)(*str8, m, &len);
if (buf && len > 0) {
if (len == 1) {
*tmp16++ = *buf;
} else {
fprintf(stderr, "one-to-many mapping not implemented yet\n");
}
}
}
str8++;
}
*tmp16 = 0x0000; /* NUL16 terminate */
if (rlen) {
*rlen = tmp16 - str16;
}
return str16;
}
U8* map8_to_str8(Map8* m, U16* str16, U8* str8, int len, int* rlen)
{
U8* tmp8;
if (str16 == 0)
return 0;
if (len < 0) {
len = strlen(str8);
}
if (str8 == 0) {
str8 = (U8*)malloc(sizeof(U8)*(len+1));
if (!str8) abort();
}
tmp8 = str8;
while (len--) {
U16 c = map8_to_char8(m, ntohs(*str16));
if (c != NOCHAR && c <= 0xFF) {
*tmp8++ = (U8)c;
} else if (m->def_to8 != NOCHAR) {
*tmp8++ = (U8)m->def_to8;
} else if (m->cb_to8) {
U8* buf;
STRLEN len;
buf = (m->cb_to8)(ntohs(*str16), m, &len);
if (buf && len > 0) {
if (len == 1) {
*tmp8++ = *buf;
} else {
fprintf(stderr, "one-to-many mapping not implemented yet\n");
}
}
}
str16++;
}
*tmp8 = '\0'; /* NUL terminate */
if (rlen) {
*rlen = tmp8 - str8;
}
return str8;
}
#endif /* !PERL */
U8* map8_recode8(Map8* m1, Map8* m2, U8* from, U8* to, int len, int* rlen)
{
dTHX;
U8* tmp;
U16 uc;
U16 u8; /* need U16 to represent NOCHAR */
int didwarn = 0;
if (from == 0)
return 0;
if (len < 0) {
len = strlen(from);
}
if (to == 0) {
to = (U8*)malloc(sizeof(U8)*(len+1));
if (!to) abort();
}
tmp = to;
while (len--) {
/* First translate to common Unicode representation */
U16 uc = map8_to_char16(m1, *from);
if (uc != NOCHAR)
goto got_16;
if (m1->def_to16 != NOCHAR) {
uc = m1->def_to16;
goto got_16;
}
if (m1->cb_to16) {
U16 *buf;
STRLEN len;
buf = (m1->cb_to16)(*from, m1, &len);
if (buf && len == 1) {
uc = htons(*buf);
goto got_16;
}
if (len > 1 && !didwarn++)
PerlIO_printf(PerlIO_stderr(), "one-to-many mapping not implemented yet\n");
}
/* Never managed to find a mapping to Unicode, skip it */
from++;
continue;
got_16:
from++; /* 'uc' char translated now */
/* Then map 'uc' back to the second 8-bit encoding */
u8 = map8_to_char8(m2, ntohs(uc));
if (u8 == NOCHAR || u8 > 0xFF) {
if (m2->def_to8 != NOCHAR)
u8 = m2->def_to8;
else if (m2->cb_to8) {
U8* buf;
STRLEN len;
buf = (m2->cb_to8)(ntohs(uc), m2, &len);
if (!buf || len != 1)
continue; /* no mapping exists for this char */
}
else
continue;
}
*tmp++ = (U8)u8;
}
*tmp = '\0'; /* NUL terminate */
if (rlen) {
*rlen = tmp - to;
}
return to;
}
int map8_empty_block(Map8* m, U8 block)
{
return m->to_8[block] == nochar_map;
}
#ifdef MAP8_DEBUGGING
void
map8_print(Map8* m)
{
map8_fprint(m, stdout);
}
void
map8_fprint(Map8* m, FILE* f)
{
int i, j;
long size = 0;
int num_ident = 0;
int num_nomap = 0;
if (!m) {
fprintf(f, "NULL mapping\n");
return;
}
size += sizeof(Map8);
fprintf(f, "MAP8 %p\n", m);
fprintf(f, " U8-U16\n");
for (i = 0; i < 256; i++) {
U16 u = m->to_16[i];
if (i == u) {
num_ident++;
} else if (u == NOCHAR) {
num_nomap++;
} else {
fprintf(f, " %02x U+%04x (%d --> %d)\n", i, u, i, u);
}
}
if (num_ident)
fprintf(f, " +%d identity mappings\n", num_ident);
if (num_nomap) {
fprintf(f, " +%d nochar mappings", num_nomap);
if (m->nomap8)
fprintf(f, " (mapping func %p)", m->nomap8);
fprintf(f, "\n");
}
for (i = 0; i < 256; i++) {
num_ident = 0;
num_nomap = 0;
if (m->to_8[i] == 0) {
fprintf(f, " U16-U8: block %d NULL (should not happen)\n", i);
} else if (m->to_8[i] != nochar_map) {
size += sizeof(U16)*256;
fprintf(f, " U16-U8: block %d %p\n", i, m->to_8[i]);
for (j = 0; j < 256; j++) {
int from = i*256+j;
int to = m->to_8[i][j];
if (from == to) {
num_ident++;
} else if (to == NOCHAR) {
num_nomap++;
/* fprintf(f, " NOMAP %d\n", from); */
} else {
fprintf(f, " U+%04x %02x (%d --> %d)\n", from, to, from, to);
}
}
if (num_ident)
fprintf(f, " +%d identity mappings\n", num_ident);
if (num_nomap)
fprintf(f, " +%d nochar mappings\n", num_nomap);
}
}
if (m->nomap16)
fprintf(f, " U16-U8: nochar mapping func %p\n", m->nomap16);
fprintf(f, " (%d bytes allocated)\n", size);
}
#endif