The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file extends lang_enc.h with additional languages and extended routines.
// It is current with Unicode 5.1 (March 2008)
//

#ifndef ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__
#define ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__

#include "languages/public/languages.h"
#include "encodings/compact_lang_det/letterscript_enum.h"


// Leave a small gap after the base languages, so adding one or two is easy.
// Just reduce the gap here (currently 5 entries)

// Montengrin added, so reducing this from 5 to 4. dsites 2008.10.06
#define EXT_LANGUAGE_BASE (NUM_LANGUAGES + 4)

// Google UI languages
#define X_BORK_BORK_BORK (Language)(EXT_LANGUAGE_BASE+0)
#define X_PIG_LATIN (Language)(EXT_LANGUAGE_BASE+1)
#define X_HACKER (Language)(EXT_LANGUAGE_BASE+2)
#define X_KLINGON (Language)(EXT_LANGUAGE_BASE+3)
#define X_ELMER_FUDD (Language)(EXT_LANGUAGE_BASE+4)

// Pseudo-languages for Unicode scripts that express a single language
#define X_OGHAM (Language)(EXT_LANGUAGE_BASE+5)
#define X_RUNIC (Language)(EXT_LANGUAGE_BASE+6)
#define X_YI (Language)(EXT_LANGUAGE_BASE+7)
#define X_OLD_ITALIC (Language)(EXT_LANGUAGE_BASE+8)
#define X_GOTHIC (Language)(EXT_LANGUAGE_BASE+9)
#define X_DESERET (Language)(EXT_LANGUAGE_BASE+10)
#define X_HANUNOO (Language)(EXT_LANGUAGE_BASE+11)
#define X_BUHID (Language)(EXT_LANGUAGE_BASE+12)
#define X_TAGBANWA (Language)(EXT_LANGUAGE_BASE+13)
#define X_TAI_LE (Language)(EXT_LANGUAGE_BASE+14)
#define X_LINEAR_B (Language)(EXT_LANGUAGE_BASE+15)
#define X_UGARITIC (Language)(EXT_LANGUAGE_BASE+16)
#define X_SHAVIAN (Language)(EXT_LANGUAGE_BASE+17)
#define X_OSMANYA (Language)(EXT_LANGUAGE_BASE+18)
#define X_CYPRIOT (Language)(EXT_LANGUAGE_BASE+19)
#define X_BUGINESE (Language)(EXT_LANGUAGE_BASE+20)
#define X_COPTIC (Language)(EXT_LANGUAGE_BASE+21)
#define X_NEW_TAI_LUE (Language)(EXT_LANGUAGE_BASE+22)
#define X_GLAGOLITIC (Language)(EXT_LANGUAGE_BASE+23)
#define X_TIFINAGH (Language)(EXT_LANGUAGE_BASE+24)
#define X_SYLOTI_NAGRI (Language)(EXT_LANGUAGE_BASE+25)
#define X_OLD_PERSIAN (Language)(EXT_LANGUAGE_BASE+26)
#define X_KHAROSHTHI (Language)(EXT_LANGUAGE_BASE+27)
#define X_BALINESE (Language)(EXT_LANGUAGE_BASE+28)
#define X_CUNEIFORM (Language)(EXT_LANGUAGE_BASE+29)
#define X_PHOENICIAN (Language)(EXT_LANGUAGE_BASE+30)
#define X_PHAGS_PA (Language)(EXT_LANGUAGE_BASE+31)
#define X_NKO (Language)(EXT_LANGUAGE_BASE+32)

// Unicode 5.1
#define X_SUDANESE (Language)(EXT_LANGUAGE_BASE+33)
#define X_LEPCHA (Language)(EXT_LANGUAGE_BASE+34)
#define X_OL_CHIKI (Language)(EXT_LANGUAGE_BASE+35)
#define X_VAI (Language)(EXT_LANGUAGE_BASE+36)
#define X_SAURASHTRA (Language)(EXT_LANGUAGE_BASE+37)
#define X_KAYAH_LI (Language)(EXT_LANGUAGE_BASE+38)
#define X_REJANG (Language)(EXT_LANGUAGE_BASE+39)
#define X_LYCIAN (Language)(EXT_LANGUAGE_BASE+40)
#define X_CARIAN (Language)(EXT_LANGUAGE_BASE+41)
#define X_LYDIAN (Language)(EXT_LANGUAGE_BASE+42)
#define X_CHAM (Language)(EXT_LANGUAGE_BASE+43)

#define EXT_NUM_LANGUAGES (Language)(EXT_LANGUAGE_BASE+44)



// ExtLanguageName
// ------------
// Given the Language, returns its string name used as the output by
// the lang/enc identifier, e.g. "Korean"
// "invalid_language" if the input is invalid.
extern const char* ExtLanguageName(const Language lang);

// ExtLanguageDeclaredName
// ------------
// Given the Language, returns its Language enum spelling, for use by
// programs that create C declarations, e.g. "KOREAN"
// "UNKNOWN_LANGUAGE" if the input is invalid.
extern const char* ExtLanguageDeclaredName(const Language lang);

// ExtLanguageCode
// ------------
// Given the Language, return the language code, e.g. "ko"
// This is determined by
// the following (in order of preference):
// - ISO-639-1 two-letter language code
//   (all except those mentioned below)
// - ISO-639-2 three-letter bibliographic language code
//   (Tibetan, Dhivehi, Cherokee, Syriac)
// - Google-specific language code
//   (ChineseT ("zh-TW"), Teragram Unknown, Unknown,
//   Portuguese-Portugal, Portuguese-Brazil, Limbu)
extern const char * ExtLanguageCode(const Language lang);


// Convert "en-Latn-GB" to ENGLISH
// Normalize to PORTUGUESE, not PORTUGUESE_B nor PORTUGUESE_P
// Consider for later: NORWEGIAN, NORWEGIAN_N
// Consider for later: SCOTS, SCOTS_GAELIC
// Consider for later: SERBO_CROATIAN, SERBIAN, CROATIAN, BOSNIAN
//
Language GetLanguageFromNumberOrName(const char* src);

// Convert "en-Latn-GB" to ULScript_Latin
UnicodeLScript GetLScriptFromNumberOrName(const char* src);

// Merge together some languages, such as bo/hr/sr
Language NormalizeLanguage(Language lang);

#endif  // ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__