The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* -*- Mode: c; c-basic-offset: 2 -*-
 *
 * raptor_nfc_test.c - Raptor Unicode NFC validation check
 *
 * Copyright (C) 2004-2006, David Beckett http://purl.org/net/dajobe/
 * Copyright (C) 2004-2004, University of Bristol, UK http://www.bristol.ac.uk/
 * 
 * This package is Free Software and part of Redland http://librdf.org/
 * 
 * It is licensed under the following three licenses as alternatives:
 *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
 *   2. GNU General Public License (GPL) V2 or any newer version
 *   3. Apache License, V2.0 or any newer version
 * 
 * You may not use this file except in compliance with at least one of
 * the above three licenses.
 * 
 * See LICENSE.html or LICENSE.txt at the top of this package for the
 * complete terms and further detail along with the license texts for
 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
 * 
 * 
 */


#ifdef HAVE_CONFIG_H
#include <raptor_config.h>
#endif

#ifdef WIN32
#include <win32_raptor_config.h>
#endif

#include <stdio.h>
#include <string.h>
#include <ctype.h> /* for isprint() */
#include <stdarg.h>
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif

/* Raptor includes */
#include "raptor.h"
#include "raptor_internal.h"
#include "raptor_nfc.h"


#undef RAPTOR_NFC_DECODE_DEBUG


/*
 * decode_to_utf8:
 * @utf8_string: destination utf8 buffer (FIXME big enough!)
 * @unicode_string: first char of string
 * @end: last char of unicode_string
 */
static int
decode_to_utf8(unsigned char *utf8_string, size_t utf8_string_length,
               const char *unicode_string, const char *end)
{
  unsigned char *u=utf8_string;
  const char *p=unicode_string;
  
#ifdef RAPTOR_NFC_DECODE_DEBUG
  fputs("decode_to_utf8: string '", stderr);
  (void)fwrite(unicode_string, sizeof(char), (end-unicode_string)+1, stderr);
  fputs("' converts to:\n  ", stderr);
#endif

  while(p < end) {
    unsigned long c=0;
    char *endptr;

    if(*p == ' ') {
      p++;
      continue;
    }
    
    c=(unsigned long)strtol(p, &endptr, 16);

#ifdef RAPTOR_NFC_DECODE_DEBUG
    fprintf(stderr, "U+%04lX ", c);
#endif

    p=(unsigned char*)endptr;
    
    u+= raptor_unicode_char_to_utf8(c, u);
    
    if((u-utf8_string) > utf8_string_length) {
      fprintf(stderr,
              "decode_to_utf8 overwote utf8_string buffer at byte %d\n",
              (u-utf8_string));
      abort();
    }
  }

#ifdef RAPTOR_NFC_DECODE_DEBUG
  fputs("\n", stderr);
#endif

  return u-utf8_string;
}



static void
utf8_print(const unsigned char *input, int length, FILE *stream)
{
  int i=0;
  
  while(i<length && *input) {
    unsigned long c;
    int size=raptor_utf8_to_unicode_char(&c, input, length-i);
    if(size <= 0)
      return;
    if(i)
      fputc(' ', stream);
    fprintf(stream, "U+%04X", (int)c);
    input += size;
    i += size;
  }
}


int
main (int argc, char *argv[]) 
{
  const char *program=raptor_basename(argv[0]);
  static const char *filename="NormalizationTest.txt";
  FILE *fh;
  int rc=0;
  unsigned int line=1;
  size_t max_c2_len=0;
  size_t max_c4_len=0;
  int passes=0;
  int fails=0;

  fh=fopen(filename, "r");
  if(!fh) {
    fprintf(stderr, "%s: file '%s' open failed - %s\n",
            program, filename, strerror(errno));
    return 1;
  }

#define LINE_BUFFER_SIZE 1024

/* FIXME big enough for Unicode 4 (c2 max 16; c4 max 33) */
#define UNISTR_SIZE 40

  for(;!feof(fh); line++) {
    char buffer[LINE_BUFFER_SIZE];
    char *p, *start;
    unsigned char nfc1[UNISTR_SIZE];
    unsigned char nfc2[UNISTR_SIZE];
    size_t nfc1_len, nfc2_len;
    int nfc_rc;
    int error;
    
    p=fgets(buffer, LINE_BUFFER_SIZE, fh);
    if(!p) {
      if(ferror(fh)) {
        fprintf(stderr, "%s: file '%s' read failed - %s\n",
                program, filename, strerror(errno));
        rc=1;
        break;
      }
      /* assume feof */
      break;
    };

#if 0
    fprintf(stderr, "%s:%d: line '%s'\n", program, line, buffer);
#endif

    /* skip lines */
    if(*p == '@' || *p == '#')
      continue;
    
    /* skip column 1 */
    while(*p++ != ';')
      ;
      
    start=p;
    /* find end column 2 */
    while(*p++ != ';')
      ;

    nfc1_len=decode_to_utf8(nfc1, UNISTR_SIZE, start, p-1);
    if(nfc1_len > max_c2_len)
      max_c2_len=nfc1_len;
    
    /* skip column 3 */
    while(*p++ != ';')
      ;

    start=p;
    /* find end column 4 */
    while(*p++ != ';')
      ;

    nfc2_len=decode_to_utf8(nfc2, UNISTR_SIZE, start, p-1);
    if(nfc2_len > max_c4_len)
      max_c4_len=nfc2_len;

    if(!raptor_utf8_check(nfc1, nfc1_len)) {
      fprintf(stderr, "%s:%d: UTF8 check 1 failed on: '", filename, line);
      utf8_print(nfc1, nfc1_len, stderr);
      fputs("'\n", stderr);
      fails++;
    } else
      passes++;

    nfc_rc=raptor_nfc_check(nfc1, nfc1_len, &error);
    if(!nfc_rc) {
      fprintf(stderr, "%s:%d: NFC check 1 failed on: '", filename, line);
      utf8_print(nfc1, nfc1_len, stderr);
      fprintf(stderr, "' at byte %d of %d\n", error, (int)nfc1_len);
      fails++;
    } else
      passes++;

    if(nfc1_len == nfc2_len && !memcmp(nfc1, nfc2, nfc1_len))
      continue;

    if(!raptor_utf8_check(nfc2, nfc2_len)) {
      fprintf(stderr, "%s:%d: UTF8 check 2 failed on: '", filename, line);
      utf8_print(nfc2, nfc2_len, stderr);
      fputs("'\n", stderr);
      fails++;
    } else
      passes++;

    nfc_rc=raptor_nfc_check(nfc2, nfc2_len, &error);
    if(!nfc_rc) {
      fprintf(stderr, "%s:%d: NFC check 2 failed on: '", filename, line);
      utf8_print(nfc2, nfc2_len, stderr);
      fprintf(stderr, "' at byte %d of %d\n", error, (int)nfc2_len);
      fails++;
    } else
      passes++;
    
  }

  fclose(fh);

  fprintf(stderr, "%s: max c2 len: %d,  max c4 len: %d\n", program,
          (int)max_c2_len, (int)max_c4_len);
  fprintf(stderr, "%s: passes: %d fails: %d\n", program,
          passes, fails);

  return rc;
}