The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* -*- Mode: c; c-basic-offset: 2 -*-
 *
 * raptor_nfc.h - Raptor Unicode NFC headers
 *
 * Copyright (C) 2004-2006, David Beckett http://purl.org/net/dajobe/
 * Copyright (C) 2004-2004, University of Bristol, UK http://www.bristol.ac.uk/
 * 
 * This package is Free Software and part of Redland http://librdf.org/
 * 
 * It is licensed under the following three licenses as alternatives:
 *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
 *   2. GNU General Public License (GPL) V2 or any newer version
 *   3. Apache License, V2.0 or any newer version
 * 
 * You may not use this file except in compliance with at least one of
 * the above three licenses.
 * 
 * See LICENSE.html or LICENSE.txt at the top of this package for the
 * complete terms and further detail along with the license texts for
 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
 * 
 * 
 */


#include <sys/types.h>

#if u32 == MISSING
  #undef u32
  #if SIZEOF_UNSIGNED_INT == 4
    typedef unsigned int u32;
  #elif SIZEOF_UNSIGNED_LONG == 4
    typedef unsigned long u32;
  #else
    #error u32 type not defined
  #endif
#endif


#if u16 == MISSING
  #undef u16
  #if SIZEOF_UNSIGNED_SHORT == 2
    typedef unsigned int u16;
  #elif SIZEOF_UNSIGNED_INT == 2
    typedef unsigned long u16;
  #else
    #error u16 type not defined
  #endif
#endif


#if u8 == MISSING
  #undef u8
  #if SIZEOF_UNSIGNED_CHAR == 1
    typedef unsigned char u8;
  #else
    #error u8 type not defined
  #endif
#endif

/*
 * Definitions for Unicode NFC data tables
 *
 * See Unicode Normalization http://unicode.org/unicode/reports/tr15/
 * for the definition of Unicode Normal Form C (NFC)
 */


/* Unicode combining classes
 *
 * The combining class is taken from the 4th field of UnicodeData.txt
 * and are mostly class 0 - nothing special.  This structure
 * is used to make a sparse sequence of (key, class) pairs
 * ordered by key, of the non-0 class entries.
 *
 */
typedef struct 
{
  /* the code (0.. 0x10FFD inclusive - 24 bits) */
  unsigned int key:24;
 /* the combining class (0.. 255 - 8 bits is enough, there are ~50-60 used) */
  unsigned int combining_class:8;
} raptor_nfc_key_class;


/* Unicode combining characters
 *
 * Pairs of characters (base, follow) that must be in that order
 * They are all 0..0xFFFF inclusive
 *
 * This structure is used to make a sparse sequence of (base, follow)
 * pairs of valid combinations. 'base' may have several valid 'follow's in
 * the sequence.
 */
typedef struct
{
  u16 base;
  u16 follow;
}  raptor_nfc_base_follow;


/*
 * Flags for codes U+0 to U+108FF, U+1D000 to U+1D7FF
 */

typedef enum {
  HIGH,  /* U+D800 to U+DBFF High Surrogates */
  loww,  /* U+DC00 to U+DFFF Low Surrogates */
  NoNo,  /* code that does not exist */
  NOFC,  /* forbidden or excluded in NFC */
  ReCo,  /* class > 0 recombining */
  NoRe,  /* class > 0 not recombining */
  COM0,  /* class 0 and composing */
  Hang,  /* U+1100 to U+1112 - Hangul Jamo (Korean) initial consonants */
  hAng,  /* U+1161 to U+1175 - Hangul Jamo (Korean) medial vowels */
  haNG,  /* U+11A8 to U+11C2 - Hangul Jamo (Korean) trailing consonants */
  HAng,  /* U+AC00 to U+D7A3 (except for every 28) - Hangul syllables */
  Base,  /* base that combines */
  simp   /* class 0 nothing special */
} raptor_nfc_code_flag;


#define RAPTOR_NFC_CLASSES_COUNT 352
extern raptor_nfc_key_class raptor_nfc_classes[RAPTOR_NFC_CLASSES_COUNT];

#define RAPTOR_NFC_RECOMBINERS_COUNT 2177
extern raptor_nfc_base_follow raptor_nfc_recombiners[RAPTOR_NFC_RECOMBINERS_COUNT];

#define RAPTOR_NFC_CODE_FLAGS_COUNT 34944
extern u8 raptor_nfc_flags[RAPTOR_NFC_CODE_FLAGS_COUNT];