The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*
 * $Id: uni.c,v 2.0 2005/05/16 19:08:16 dankogai Exp $
 * (c) 1999-2003 Dan Kogai <dankogai@dan.co.jp>
 * This library is free software; you can redistribute it and/or
 * modify it under the same terms as Perl itself.
 */


#include <stdio.h>
#include <string.h>

#include "uni2euc.h"
#include "euc2uni.h"

#ifndef U8
#define U8 unsigned char
#endif
#ifndef U16
#define U16 unsigned short
#endif
#ifndef U32
#define U32 unsigned long
#endif

U32 _ucs2_euc(U8 *dst, U8 *src, U32 nchar){
    U32 result = 0;
    U32 len;
    char *offset;
    for (nchar /= 2; nchar > 0; nchar--, src += 2){
	offset = uni2euc[src[0]] + src[1]*4;
	strncpy((char *)dst, offset, 4);
	len = strlen(offset);
	dst += len;
	result += len;
    }
    return result;
}

# define FB_UNI 0xFFFd
# define CHKUTEN(x) (0 <= (x) && (x) <  94*94) 

U32 _euc_ucs2(U8 *dst, U8 *src){
    U32 result = 0;
    U32 kuten;
    U16 ucs2;
    for (result = 0; *src != '\0';  src++, dst += 2, result += 2){
	if (*src <= 0x7F){       /* ASCII */
	    ucs2 = src[0];
	}else if (*src == 0x8e){ /* jisx0201 */
	    if (src[1]){
		ucs2 = j01_uni[src[1]];
		src += 1;
	    }else{
		ucs2 = FB_UNI;
	    }
	}else if (*src == 0x8f){ /* jisx0212 */
	    if (src[1] && src[2]){
		kuten  = (src[1] - 0xa1)*94 + (src[2] - 0xa1);
		ucs2 = CHKUTEN(kuten) ? j12_uni[kuten] : FB_UNI;
		src += 2;
	    }else{
		ucs2 = FB_UNI;
		if (src[1])
		    src++;
	    }
	}else{                   /* jisx0208 */
	    if (src[1]){
		kuten  = (src[0] - 0xa1)*94 + (src[1] - 0xa1);
		ucs2 = CHKUTEN(kuten) ? j08_uni[kuten] : FB_UNI;
		src += 1;
	    }else{
		ucs2 = FB_UNI;
	    }
	}
	dst[0] = ucs2/256; dst[1] = ucs2%256;
    }
    return result;
}

U32 _ucs2_utf8(U8 *dst, U8 *src, U32 nchar){
    U32 ucs2;
    U32 result = 0;
    for (nchar /= 2; nchar > 0; nchar--, src += 2) {
	ucs2 = src[0]*256 + src[1];
	if (ucs2 < 0x80){      /* 1 byte */
	    *dst++ = ucs2; 
	    result += 1;
	}else if (ucs2 < 0x800){ /* 2 bytes */
	    *dst++ = (0xC0 | (ucs2 >> 6));
	    *dst++ = (0x80 | (ucs2 & 0x3F));
	    result += 2;
	}else{                /*  3 bytes */
	    *dst++ = (0xE0 | (ucs2 >> 12));
	    *dst++ = (0x80 | ((ucs2 >> 6) & 0x3F));
	    *dst++ = (0x80 | (ucs2 & 0x3F));
	    result += 3;
	}
    }
    *dst  = '\0';
    return result;
}

U32 _utf8_ucs2(U8 *dst, U8 *src){
    U32  ucs2;
    U8 c1, c2, c3;
    U32 result = 0;
  
    for(; *src != '\0'; src++, result++){
	if (*src < 0x80) {     /* 1 byte */
	    ucs2 = *src;
	}else if (*src < 0xE0){ /* 2 bytes */
	    if (src[1]){
		c1 = *src++; c2 = *src;
		ucs2 = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
	    }else{
		ucs2 = FB_UNI;
	    }
	}else{                 /* 3 bytes */
	    if (src[1] && src[2]){
		c1 = *src++; c2 = *src++; c3 = *src;
		ucs2 = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F);
	    }else{
		ucs2 = FB_UNI;
		if (src[1])
		    src++;
	    }
	}
	*dst++ = (ucs2 & 0xff00) >> 8; /* 1st byte */
	*dst++ = (ucs2 & 0xff);        /* 2nd byte */;
    }
    return result * 2;
}

U32 _euc_utf8(U8 *dst, U8 *src){
    U32 result = 0;
    U32 kuten;
    U16 ucs2;
    for (result = 0; *src != '\0';  src++){
	if (*src <= 0x7F){       /* ASCII */
	    ucs2 = src[0];
	}else if (*src == 0x8e){ /* jisx0201 */
	    if (src[1]){
		ucs2 = j01_uni[src[1]];
		src += 1;
	    }else{
		ucs2 = FB_UNI;
	    }
	}else if (*src == 0x8f){ /* jisx0212 */
	    if (src[1] && src[2]){
		kuten  = (src[1] - 0xa1)*94 + (src[2] - 0xa1);
		ucs2 = CHKUTEN(kuten) ? j12_uni[kuten] : FB_UNI;
		src += 2;
	    }else{
		ucs2 = FB_UNI;
		if (src[1])
		    src++;
	    }
	}else{                   /* jisx0208 */
	    if (src[1]){
		kuten  = (src[0] - 0xa1)*94 + (src[1] - 0xa1);
		ucs2 = CHKUTEN(kuten) ? j08_uni[kuten] : FB_UNI;
		src += 1;
	    }else{
		ucs2 = FB_UNI;
	    }
	}
	if (ucs2 < 0x80){      /* 1 byte */
	    *dst++ = ucs2; 
	    result += 1;
	}else if (ucs2 < 0x800){ /* 2 bytes */
	    *dst++ = (0xC0 | (ucs2 >> 6));
	    *dst++ = (0x80 | (ucs2 & 0x3F));
	    result += 2;
	}else{                /*  3 bytes */
	    *dst++ = (0xE0 | (ucs2 >> 12));
	    *dst++ = (0x80 | ((ucs2 >> 6) & 0x3F));
	    *dst++ = (0x80 | (ucs2 & 0x3F));
	    result += 3;
	}
    }
    *dst  = '\0';
    return result;
}

U32 _utf8_euc(U8 *dst, U8 *src){
    U32 result = 0;
    U32 len;
    U16 ucs2;
    U8 c1, c2, c3;
    char *offset;
    for(; *src != '\0'; src++){
	if (*src < 0x80) {     /* 1 byte */
	    ucs2 = *src;
	}else if (*src < 0xE0){ /* 2 bytes */
	    if (src[1]){
		c1 = *src++; c2 = *src;
		ucs2 = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
	    }else{
		ucs2 = FB_UNI;
	    }
	}else{                 /* 3 bytes */
	    if (src[1] && src[2]){
		c1 = *src++; c2 = *src++; c3 = *src;
		ucs2 = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F);
	    }else{
		ucs2 = FB_UNI;
		if (src[1]){
		    src++;
		}
	    }
	}
	offset = uni2euc[ucs2/256] + (ucs2%256)*4;
	strncpy((char *)dst, offset, 4);
	len = strlen(offset);
	dst += len;
	result += len;
    }
    return result;
}

#ifndef PERL_XS

#include <sys/errno.h>

int main(int argc, char **argv){
    U8 buf1[1024], buf2[1024];
    int result;
    
    FILE *IN;
    if (argc > 1){
	IN = fopen(argv[1], "r");
	if (IN == NULL){
	    fprintf(stderr, "Can't open %s; %s\n", argv[1], strerror(errno));
	    exit(-1);
	}
    }else{
	IN = stdin;
    }

#ifdef EUC_UTF8

    while(fgets(buf2, 256, IN)){
	result = _euc_utf8(buf1, buf2);
	fputs(buf1, stdout);
    }
    
#else
    
    while(fgets(buf1, 256, IN)){
	result = _utf8_euc(buf2, buf1);
	fputs(buf2, stdout);
    }
    
#endif

}

#endif