Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

uchar.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1997-2001, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File UCHAR.H
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   04/02/97    aliu        Creation.
00013 *   03/29/99    helena      Updated for C APIs.
00014 *   4/15/99     Madhu       Updated for C Implementation and Javadoc
00015 *   5/20/99     Madhu       Added the function u_getVersion()
00016 *   8/19/1999   srl         Upgraded scripts to Unicode 3.0
00017 *   8/27/1999   schererm    UCharDirection constants: U_...
00018 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
00019 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
00020 ******************************************************************************
00021 */
00022 
00023 #ifndef UCHAR_H
00024 #define UCHAR_H
00025 
00026 #include "unicode/utypes.h"
00027 
00028 U_CDECL_BEGIN
00029 
00030 /*==========================================================================*/
00031 /* Unicode version number                                                   */
00032 /*==========================================================================*/
00042 #define U_UNICODE_VERSION "3.2"
00043 
00065 #define UCHAR_MIN_VALUE 0
00066 
00073 #define UCHAR_MAX_VALUE 0x10ffff
00074 
00079 #define U_MASK(x) ((uint32_t)1<<(x))
00080 
00113 typedef enum UProperty {
00125     UCHAR_ALPHABETIC=0,
00127     UCHAR_BINARY_START=UCHAR_ALPHABETIC,
00129     UCHAR_ASCII_HEX_DIGIT,
00133     UCHAR_BIDI_CONTROL,
00138     UCHAR_BIDI_MIRRORED,
00140     UCHAR_DASH,
00144     UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
00147     UCHAR_DEPRECATED,
00150     UCHAR_DIACRITIC,
00154     UCHAR_EXTENDER,
00158     UCHAR_FULL_COMPOSITION_EXCLUSION,
00162     UCHAR_GRAPHEME_BASE,
00166     UCHAR_GRAPHEME_EXTEND,
00169     UCHAR_GRAPHEME_LINK,
00172     UCHAR_HEX_DIGIT,
00175     UCHAR_HYPHEN,
00179     UCHAR_ID_CONTINUE,
00183     UCHAR_ID_START,
00186     UCHAR_IDEOGRAPHIC,
00190     UCHAR_IDS_BINARY_OPERATOR,
00194     UCHAR_IDS_TRINARY_OPERATOR,
00197     UCHAR_JOIN_CONTROL,
00201     UCHAR_LOGICAL_ORDER_EXCEPTION,
00204     UCHAR_LOWERCASE,
00206     UCHAR_MATH,
00210     UCHAR_NONCHARACTER_CODE_POINT,
00212     UCHAR_QUOTATION_MARK,
00216     UCHAR_RADICAL,
00221     UCHAR_SOFT_DOTTED,
00225     UCHAR_TERMINAL_PUNCTUATION,
00229     UCHAR_UNIFIED_IDEOGRAPH,
00232     UCHAR_UPPERCASE,
00236     UCHAR_WHITE_SPACE,
00240     UCHAR_XID_CONTINUE,
00243     UCHAR_XID_START,
00245     UCHAR_BINARY_LIMIT,
00246 
00249     UCHAR_BIDI_CLASS=0x1000,
00251     UCHAR_INT_START=UCHAR_BIDI_CLASS,
00254     UCHAR_BLOCK,
00257     UCHAR_CANONICAL_COMBINING_CLASS,
00260     UCHAR_DECOMPOSITION_TYPE,
00264     UCHAR_EAST_ASIAN_WIDTH,
00267     UCHAR_GENERAL_CATEGORY,
00270     UCHAR_JOINING_GROUP,
00273     UCHAR_JOINING_TYPE,
00276     UCHAR_LINE_BREAK,
00279     UCHAR_NUMERIC_TYPE,
00282     UCHAR_SCRIPT,
00284     UCHAR_INT_LIMIT,
00285 
00293     UCHAR_GENERAL_CATEGORY_MASK=0x2000,
00295     UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
00297     UCHAR_MASK_LIMIT,
00298 
00301     UCHAR_NUMERIC_VALUE=0x3000,
00303     UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
00305     UCHAR_DOUBLE_LIMIT,
00306 
00309     UCHAR_AGE=0x4000,
00311     UCHAR_STRING_START=UCHAR_AGE,
00314     UCHAR_BIDI_MIRRORING_GLYPH,
00317     UCHAR_CASE_FOLDING,
00320     UCHAR_ISO_COMMENT,
00323     UCHAR_LOWERCASE_MAPPING,
00326     UCHAR_NAME,
00329     UCHAR_SIMPLE_CASE_FOLDING,
00332     UCHAR_SIMPLE_LOWERCASE_MAPPING,
00335     UCHAR_SIMPLE_TITLECASE_MAPPING,
00338     UCHAR_SIMPLE_UPPERCASE_MAPPING,
00341     UCHAR_TITLECASE_MAPPING,
00344     UCHAR_UNICODE_1_NAME,
00347     UCHAR_UPPERCASE_MAPPING,
00349     UCHAR_STRING_LIMIT,
00350 
00352     UCHAR_INVALID_CODE = -1
00353 } UProperty;
00354 
00360 typedef enum UCharCategory
00361 {
00365     U_UNASSIGNED              = 0,
00367     U_GENERAL_OTHER_TYPES     = 0,
00369     U_UPPERCASE_LETTER        = 1,
00371     U_LOWERCASE_LETTER        = 2,
00373     U_TITLECASE_LETTER        = 3,
00375     U_MODIFIER_LETTER         = 4,
00377     U_OTHER_LETTER            = 5,
00379     U_NON_SPACING_MARK        = 6,
00381     U_ENCLOSING_MARK          = 7,
00383     U_COMBINING_SPACING_MARK  = 8,
00385     U_DECIMAL_DIGIT_NUMBER    = 9,
00387     U_LETTER_NUMBER           = 10,
00389     U_OTHER_NUMBER            = 11,
00391     U_SPACE_SEPARATOR         = 12,
00393     U_LINE_SEPARATOR          = 13,
00395     U_PARAGRAPH_SEPARATOR     = 14,
00397     U_CONTROL_CHAR            = 15,
00399     U_FORMAT_CHAR             = 16,
00401     U_PRIVATE_USE_CHAR        = 17,
00403     U_SURROGATE               = 18,
00405     U_DASH_PUNCTUATION        = 19,
00407     U_START_PUNCTUATION       = 20,
00409     U_END_PUNCTUATION         = 21,
00411     U_CONNECTOR_PUNCTUATION   = 22,
00413     U_OTHER_PUNCTUATION       = 23,
00415     U_MATH_SYMBOL             = 24,
00417     U_CURRENCY_SYMBOL         = 25,
00419     U_MODIFIER_SYMBOL         = 26,
00421     U_OTHER_SYMBOL            = 27,
00423     U_INITIAL_PUNCTUATION     = 28,
00425     U_FINAL_PUNCTUATION       = 29,
00427     U_CHAR_CATEGORY_COUNT
00428 } UCharCategory;
00429 
00444 #define U_GC_CN_MASK    U_MASK(U_GENERAL_OTHER_TYPES)
00445 
00447 #define U_GC_LU_MASK    U_MASK(U_UPPERCASE_LETTER)
00448 
00449 #define U_GC_LL_MASK    U_MASK(U_LOWERCASE_LETTER)
00450 
00451 #define U_GC_LT_MASK    U_MASK(U_TITLECASE_LETTER)
00452 
00453 #define U_GC_LM_MASK    U_MASK(U_MODIFIER_LETTER)
00454 
00455 #define U_GC_LO_MASK    U_MASK(U_OTHER_LETTER)
00456 
00458 #define U_GC_MN_MASK    U_MASK(U_NON_SPACING_MARK)
00459 
00460 #define U_GC_ME_MASK    U_MASK(U_ENCLOSING_MARK)
00461 
00462 #define U_GC_MC_MASK    U_MASK(U_COMBINING_SPACING_MARK)
00463 
00465 #define U_GC_ND_MASK    U_MASK(U_DECIMAL_DIGIT_NUMBER)
00466 
00467 #define U_GC_NL_MASK    U_MASK(U_LETTER_NUMBER)
00468 
00469 #define U_GC_NO_MASK    U_MASK(U_OTHER_NUMBER)
00470 
00472 #define U_GC_ZS_MASK    U_MASK(U_SPACE_SEPARATOR)
00473 
00474 #define U_GC_ZL_MASK    U_MASK(U_LINE_SEPARATOR)
00475 
00476 #define U_GC_ZP_MASK    U_MASK(U_PARAGRAPH_SEPARATOR)
00477 
00479 #define U_GC_CC_MASK    U_MASK(U_CONTROL_CHAR)
00480 
00481 #define U_GC_CF_MASK    U_MASK(U_FORMAT_CHAR)
00482 
00483 #define U_GC_CO_MASK    U_MASK(U_PRIVATE_USE_CHAR)
00484 
00485 #define U_GC_CS_MASK    U_MASK(U_SURROGATE)
00486 
00488 #define U_GC_PD_MASK    U_MASK(U_DASH_PUNCTUATION)
00489 
00490 #define U_GC_PS_MASK    U_MASK(U_START_PUNCTUATION)
00491 
00492 #define U_GC_PE_MASK    U_MASK(U_END_PUNCTUATION)
00493 
00494 #define U_GC_PC_MASK    U_MASK(U_CONNECTOR_PUNCTUATION)
00495 
00496 #define U_GC_PO_MASK    U_MASK(U_OTHER_PUNCTUATION)
00497 
00499 #define U_GC_SM_MASK    U_MASK(U_MATH_SYMBOL)
00500 
00501 #define U_GC_SC_MASK    U_MASK(U_CURRENCY_SYMBOL)
00502 
00503 #define U_GC_SK_MASK    U_MASK(U_MODIFIER_SYMBOL)
00504 
00505 #define U_GC_SO_MASK    U_MASK(U_OTHER_SYMBOL)
00506 
00508 #define U_GC_PI_MASK    U_MASK(U_INITIAL_PUNCTUATION)
00509 
00510 #define U_GC_PF_MASK    U_MASK(U_FINAL_PUNCTUATION)
00511 
00512 
00514 #define U_GC_L_MASK \
00515             (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
00516 
00518 #define U_GC_LC_MASK \
00519             (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
00520 
00522 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
00523 
00525 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
00526 
00528 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
00529 
00531 #define U_GC_C_MASK \
00532             (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
00533 
00535 #define U_GC_P_MASK \
00536             (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
00537              U_GC_PI_MASK|U_GC_PF_MASK)
00538 
00540 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
00541 
00546 typedef enum UCharDirection { 
00550     U_LEFT_TO_RIGHT               = 0, 
00552     U_RIGHT_TO_LEFT               = 1, 
00554     U_EUROPEAN_NUMBER             = 2,
00556     U_EUROPEAN_NUMBER_SEPARATOR   = 3,
00558     U_EUROPEAN_NUMBER_TERMINATOR  = 4,
00560     U_ARABIC_NUMBER               = 5,
00562     U_COMMON_NUMBER_SEPARATOR     = 6,
00564     U_BLOCK_SEPARATOR             = 7,
00566     U_SEGMENT_SEPARATOR           = 8,
00568     U_WHITE_SPACE_NEUTRAL         = 9, 
00570     U_OTHER_NEUTRAL               = 10, 
00572     U_LEFT_TO_RIGHT_EMBEDDING     = 11,
00574     U_LEFT_TO_RIGHT_OVERRIDE      = 12,
00576     U_RIGHT_TO_LEFT_ARABIC        = 13,
00578     U_RIGHT_TO_LEFT_EMBEDDING     = 14,
00580     U_RIGHT_TO_LEFT_OVERRIDE      = 15,
00582     U_POP_DIRECTIONAL_FORMAT      = 16,
00584     U_DIR_NON_SPACING_MARK        = 17,
00586     U_BOUNDARY_NEUTRAL            = 18,
00588     U_CHAR_DIRECTION_COUNT
00589 } UCharDirection;
00590 
00596 enum UBlockCode {
00598     UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
00599 
00601     UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
00602 
00604     UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
00605 
00607     UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
00608 
00610     UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
00611     
00613     UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
00614 
00616     UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
00617     
00622     UBLOCK_GREEK =8, /*[0370]*/
00623 
00625     UBLOCK_CYRILLIC =9, /*[0400]*/
00626 
00628     UBLOCK_ARMENIAN =10, /*[0530]*/
00629 
00631     UBLOCK_HEBREW =11, /*[0590]*/
00632 
00634     UBLOCK_ARABIC =12, /*[0600]*/
00635 
00637     UBLOCK_SYRIAC =13, /*[0700]*/
00638 
00640     UBLOCK_THAANA =14, /*[0780]*/
00641 
00643     UBLOCK_DEVANAGARI =15, /*[0900]*/
00644 
00646     UBLOCK_BENGALI =16, /*[0980]*/
00647 
00649     UBLOCK_GURMUKHI =17, /*[0A00]*/
00650 
00652     UBLOCK_GUJARATI =18, /*[0A80]*/
00653 
00655     UBLOCK_ORIYA =19, /*[0B00]*/
00656 
00658     UBLOCK_TAMIL =20, /*[0B80]*/
00659 
00661     UBLOCK_TELUGU =21, /*[0C00]*/
00662 
00664     UBLOCK_KANNADA =22, /*[0C80]*/
00665 
00667     UBLOCK_MALAYALAM =23, /*[0D00]*/
00668 
00670     UBLOCK_SINHALA =24, /*[0D80]*/
00671 
00673     UBLOCK_THAI =25, /*[0E00]*/
00674 
00676     UBLOCK_LAO =26, /*[0E80]*/
00677 
00679     UBLOCK_TIBETAN =27, /*[0F00]*/
00680 
00682     UBLOCK_MYANMAR =28, /*[1000]*/
00683 
00685     UBLOCK_GEORGIAN =29, /*[10A0]*/
00686 
00688     UBLOCK_HANGUL_JAMO =30, /*[1100]*/
00689 
00691     UBLOCK_ETHIOPIC =31, /*[1200]*/
00692 
00694     UBLOCK_CHEROKEE =32, /*[13A0]*/
00695 
00697     UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
00698 
00700     UBLOCK_OGHAM =34, /*[1680]*/
00701 
00703     UBLOCK_RUNIC =35, /*[16A0]*/
00704 
00706     UBLOCK_KHMER =36, /*[1780]*/
00707 
00709     UBLOCK_MONGOLIAN =37, /*[1800]*/
00710 
00712     UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
00713 
00715     UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
00716 
00718     UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
00719 
00721     UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
00722     
00724     UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
00725     
00730     UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
00731     
00733     UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
00734     
00736     UBLOCK_NUMBER_FORMS =45, /*[2150]*/
00737 
00739     UBLOCK_ARROWS =46, /*[2190]*/
00740 
00742     UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
00743 
00745     UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
00746 
00748     UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
00749  
00751     UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
00752 
00754     UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
00755 
00757     UBLOCK_BOX_DRAWING =52, /*[2500]*/
00758 
00760     UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
00761 
00763     UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
00764 
00766     UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
00767 
00769     UBLOCK_DINGBATS =56, /*[2700]*/
00770 
00772     UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
00773 
00775     UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
00776 
00778     UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
00779 
00781     UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
00782 
00784     UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
00785 
00787     UBLOCK_HIRAGANA =62, /*[3040]*/
00788 
00790     UBLOCK_KATAKANA =63, /*[30A0]*/
00791 
00793     UBLOCK_BOPOMOFO =64, /*[3100]*/
00794 
00796     UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
00797 
00799     UBLOCK_KANBUN =66, /*[3190]*/
00800 
00802     UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
00803 
00805     UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
00806 
00808     UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
00809 
00811     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
00812 
00814     UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
00815 
00817     UBLOCK_YI_SYLLABLES =72, /*[A000]*/
00818 
00820     UBLOCK_YI_RADICALS =73, /*[A490]*/
00821 
00823     UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
00824 
00826     UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
00827 
00829     UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
00830 
00832     UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
00833 
00843     UBLOCK_PRIVATE_USE = 78,
00853     UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
00854 
00856     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
00857 
00859     UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
00860 
00862     UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
00863 
00865     UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
00866 
00868     UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
00869 
00871     UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
00872 
00874     UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
00875 
00877     UBLOCK_SPECIALS =86, /*[FFF0]*/
00878 
00880     UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
00881     
00883     UBLOCK_OLD_ITALIC = 88  , /*[10300]*/
00885     UBLOCK_GOTHIC = 89 , /*[10330]*/
00887     UBLOCK_DESERET = 90 , /*[10400]*/
00889     UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
00891     UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
00893     UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93  , /*[1D400]*/
00895     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94 , /*[20000]*/
00897     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
00899     UBLOCK_TAGS = 96, /*[E0000]*/
00900 
00901     /* New blocks in Unicode 3.2 */
00902 
00904     UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, /*[0500]*/
00906     UBLOCK_TAGALOG = 98, /*[1700]*/
00908     UBLOCK_HANUNOO = 99, /*[1720]*/
00910     UBLOCK_BUHID = 100, /*[1740]*/
00912     UBLOCK_TAGBANWA = 101, /*[1760]*/
00914     UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
00916     UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
00918     UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
00920     UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
00922     UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
00924     UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
00926     UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
00928     UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
00930     UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
00931 
00933     UBLOCK_COUNT,
00934 
00936     UBLOCK_INVALID_CODE=-1
00937 
00938 #ifdef ICU_UCHAR_USE_DEPRECATES
00939     ,
00940 
00942     U_BASIC_LATIN = 1,
00944     U_LATIN_1_SUPPLEMENT=2,
00946     U_LATIN_EXTENDED_A=3,
00948     U_LATIN_EXTENDED_B=4,
00950     U_IPA_EXTENSIONS=5,
00952     U_SPACING_MODIFIER_LETTERS=6,
00954     U_COMBINING_DIACRITICAL_MARKS=7,
00956     U_GREEK=8,
00958     U_CYRILLIC=9,
00960     U_ARMENIAN=10,
00962     U_HEBREW=11,
00964     U_ARABIC=12,
00966     U_SYRIAC=13,
00968     U_THAANA=14,
00970     U_DEVANAGARI=15,
00972     U_BENGALI=16,
00974     U_GURMUKHI=17,
00976     U_GUJARATI=18,
00978     U_ORIYA=19,
00980     U_TAMIL=20,
00982     U_TELUGU=21,
00984     U_KANNADA=22,
00986     U_MALAYALAM=23,
00988     U_SINHALA=24,
00990     U_THAI=25,
00992     U_LAO=26,
00994     U_TIBETAN=27,
00996     U_MYANMAR=28,
00998     U_GEORGIAN=29,
01000     U_HANGUL_JAMO=30,
01002     U_ETHIOPIC=31,
01004     U_CHEROKEE=32,
01006     U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS=33,
01008     U_OGHAM=34,
01010     U_RUNIC=35,
01012     U_KHMER=36,
01014     U_MONGOLIAN=37,
01016     U_LATIN_EXTENDED_ADDITIONAL=38,
01018     U_GREEK_EXTENDED=39,
01020     U_GENERAL_PUNCTUATION=40,
01022     U_SUPERSCRIPTS_AND_SUBSCRIPTS=41,
01024     U_CURRENCY_SYMBOLS=42,
01026     U_COMBINING_MARKS_FOR_SYMBOLS=43,
01028     U_LETTERLIKE_SYMBOLS=44,
01030     U_NUMBER_FORMS=45,
01032     U_ARROWS=46,
01034     U_MATHEMATICAL_OPERATORS=47,
01036     U_MISCELLANEOUS_TECHNICAL=48,
01038     U_CONTROL_PICTURES=49,
01040     U_OPTICAL_CHARACTER_RECOGNITION=50,
01042     U_ENCLOSED_ALPHANUMERICS=51,
01044     U_BOX_DRAWING=52,
01046     U_BLOCK_ELEMENTS=53,
01048     U_GEOMETRIC_SHAPES=54,
01050     U_MISCELLANEOUS_SYMBOLS=55,
01052     U_DINGBATS=56,
01054     U_BRAILLE_PATTERNS=57,
01056     U_CJK_RADICALS_SUPPLEMENT=58,
01058     U_KANGXI_RADICALS=59,
01060     U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS=60,
01062     U_CJK_SYMBOLS_AND_PUNCTUATION=61,
01064     U_HIRAGANA=62,
01066     U_KATAKANA=63,
01068     U_BOPOMOFO=64,
01070     U_HANGUL_COMPATIBILITY_JAMO=65,
01072     U_KANBUN=66,
01074     U_BOPOMOFO_EXTENDED=67,
01076     U_ENCLOSED_CJK_LETTERS_AND_MONTHS=68,
01078     U_CJK_COMPATIBILITY=69,
01080     U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A=70,
01082     U_CJK_UNIFIED_IDEOGRAPHS=71,
01084     U_YI_SYLLABLES=72,
01086     U_YI_RADICALS=73,
01088     U_HANGUL_SYLLABLES=74,
01090     U_HIGH_SURROGATES=75,
01092     U_HIGH_PRIVATE_USE_SURROGATES=76,
01094     U_LOW_SURROGATES=77,
01096     U_PRIVATE_USE_AREA=78,
01098     U_CJK_COMPATIBILITY_IDEOGRAPHS=79,
01100     U_ALPHABETIC_PRESENTATION_FORMS=80,
01102     U_ARABIC_PRESENTATION_FORMS_A=81,
01104     U_COMBINING_HALF_MARKS=82,
01106     U_CJK_COMPATIBILITY_FORMS=83,
01108     U_SMALL_FORM_VARIANTS=84,
01110     U_ARABIC_PRESENTATION_FORMS_B=85,
01112     U_SPECIALS=86,
01114     U_HALFWIDTH_AND_FULLWIDTH_FORMS=87,
01116     U_SCRIPT_COUNT=UBLOCK_COUNT,
01118     U_CHAR_SCRIPT_COUNT =UBLOCK_COUNT,
01120     U_NO_SCRIPT = UBLOCK_COUNT
01121 #endif /* ICU_UCHAR_USE_DEPRECATES */
01122 };
01123 
01125 typedef enum UBlockCode UBlockCode;
01126 
01131 enum UCellWidth
01132 {
01134     U_ZERO_WIDTH              = 0,
01136     U_HALF_WIDTH              = 1,
01138     U_FULL_WIDTH              = 2,
01140     U_NEUTRAL_WIDTH           = 3,
01142     U_CELL_WIDTH_COUNT
01143 };
01144 
01145 typedef enum UCellWidth UCellWidth; 
01154 typedef enum UEastAsianWidth {
01155     U_EA_NEUTRAL,   /*[N]*/ /*See note !!*/
01156     U_EA_AMBIGUOUS, /*[A]*/
01157     U_EA_HALFWIDTH, /*[H]*/
01158     U_EA_FULLWIDTH, /*[F]*/
01159     U_EA_NARROW,    /*[Na]*/
01160     U_EA_WIDE,      /*[W]*/
01161     U_EA_COUNT
01162 } UEastAsianWidth;
01163 /*
01164  * Implementation note:
01165  * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
01166  */
01167 
01179 typedef enum UCharNameChoice {
01180     U_UNICODE_CHAR_NAME,
01181     U_UNICODE_10_CHAR_NAME,
01182     U_EXTENDED_CHAR_NAME,
01183     U_CHAR_NAME_CHOICE_COUNT
01184 } UCharNameChoice;
01185 
01199 typedef enum UPropertyNameChoice {
01200     U_SHORT_PROPERTY_NAME,
01201     U_LONG_PROPERTY_NAME,
01202     U_PROPERTY_NAME_CHOICE_COUNT
01203 } UPropertyNameChoice;
01204 
01211 typedef enum UDecompositionType {
01212     U_DT_NONE,              /*[none]*/ /*See note !!*/
01213     U_DT_CANONICAL,         /*[can]*/
01214     U_DT_COMPAT,            /*[com]*/
01215     U_DT_CIRCLE,            /*[enc]*/
01216     U_DT_FINAL,             /*[fin]*/
01217     U_DT_FONT,              /*[font]*/
01218     U_DT_FRACTION,          /*[fra]*/
01219     U_DT_INITIAL,           /*[init]*/
01220     U_DT_ISOLATED,          /*[iso]*/
01221     U_DT_MEDIAL,            /*[med]*/
01222     U_DT_NARROW,            /*[nar]*/
01223     U_DT_NOBREAK,           /*[nb]*/
01224     U_DT_SMALL,             /*[sml]*/
01225     U_DT_SQUARE,            /*[sqr]*/
01226     U_DT_SUB,               /*[sub]*/
01227     U_DT_SUPER,             /*[sup]*/
01228     U_DT_VERTICAL,          /*[vert]*/
01229     U_DT_WIDE,              /*[wide]*/
01230     U_DT_COUNT /* 18 */
01231 } UDecompositionType;
01232 
01239 typedef enum UJoiningType {
01240     U_JT_NON_JOINING,       /*[U]*/ /*See note !!*/
01241     U_JT_JOIN_CAUSING,      /*[C]*/
01242     U_JT_DUAL_JOINING,      /*[D]*/
01243     U_JT_LEFT_JOINING,      /*[L]*/
01244     U_JT_RIGHT_JOINING,     /*[R]*/
01245     U_JT_TRANSPARENT,       /*[T]*/
01246     U_JT_COUNT /* 6 */
01247 } UJoiningType;
01248 
01255 typedef enum UJoiningGroup {
01256     U_JG_NO_JOINING_GROUP,
01257     U_JG_AIN,
01258     U_JG_ALAPH,
01259     U_JG_ALEF,
01260     U_JG_BEH,
01261     U_JG_BETH,
01262     U_JG_DAL,
01263     U_JG_DALATH_RISH,
01264     U_JG_E,
01265     U_JG_FEH,
01266     U_JG_FINAL_SEMKATH,
01267     U_JG_GAF,
01268     U_JG_GAMAL,
01269     U_JG_HAH,
01270     U_JG_HAMZA_ON_HEH_GOAL,
01271     U_JG_HE,
01272     U_JG_HEH,
01273     U_JG_HEH_GOAL,
01274     U_JG_HETH,
01275     U_JG_KAF,
01276     U_JG_KAPH,
01277     U_JG_KNOTTED_HEH,
01278     U_JG_LAM,
01279     U_JG_LAMADH,
01280     U_JG_MEEM,
01281     U_JG_MIM,
01282     U_JG_NOON,
01283     U_JG_NUN,
01284     U_JG_PE,
01285     U_JG_QAF,
01286     U_JG_QAPH,
01287     U_JG_REH,
01288     U_JG_REVERSED_PE,
01289     U_JG_SAD,
01290     U_JG_SADHE,
01291     U_JG_SEEN,
01292     U_JG_SEMKATH,
01293     U_JG_SHIN,
01294     U_JG_SWASH_KAF,
01295     U_JG_SYRIAC_WAW,
01296     U_JG_TAH,
01297     U_JG_TAW,
01298     U_JG_TEH_MARBUTA,
01299     U_JG_TETH,
01300     U_JG_WAW,
01301     U_JG_YEH,
01302     U_JG_YEH_BARREE,
01303     U_JG_YEH_WITH_TAIL,
01304     U_JG_YUDH,
01305     U_JG_YUDH_HE,
01306     U_JG_ZAIN,
01307     U_JG_COUNT /* 51 */
01308 } UJoiningGroup;
01309 
01316 typedef enum ULineBreak {
01317     U_LB_UNKNOWN,           /*[XX]*/ /*See note !!*/
01318     U_LB_AMBIGUOUS,         /*[AI]*/
01319     U_LB_ALPHABETIC,        /*[AL]*/
01320     U_LB_BREAK_BOTH,        /*[B2]*/
01321     U_LB_BREAK_AFTER,       /*[BA]*/
01322     U_LB_BREAK_BEFORE,      /*[BB]*/
01323     U_LB_MANDATORY_BREAK,   /*[BK]*/
01324     U_LB_CONTINGENT_BREAK,  /*[CB]*/
01325     U_LB_CLOSE_PUNCTUATION, /*[CL]*/
01326     U_LB_COMBINING_MARK,    /*[CM]*/
01327     U_LB_CARRIAGE_RETURN,   /*[CR]*/
01328     U_LB_EXCLAMATION,       /*[EX]*/
01329     U_LB_GLUE,              /*[GL]*/
01330     U_LB_HYPHEN,            /*[HY]*/
01331     U_LB_IDEOGRAPHIC,       /*[ID]*/
01332     U_LB_INSEPERABLE,       /*[IN]*/
01333     U_LB_INFIX_NUMERIC,     /*[IS]*/
01334     U_LB_LINE_FEED,         /*[LF]*/
01335     U_LB_NONSTARTER,        /*[NS]*/
01336     U_LB_NUMERIC,           /*[NU]*/
01337     U_LB_OPEN_PUNCTUATION,  /*[OP]*/
01338     U_LB_POSTFIX_NUMERIC,   /*[PO]*/
01339     U_LB_PREFIX_NUMERIC,    /*[PR]*/
01340     U_LB_QUOTATION,         /*[QU]*/
01341     U_LB_COMPLEX_CONTEXT,   /*[SA]*/
01342     U_LB_SURROGATE,         /*[SG]*/
01343     U_LB_SPACE,             /*[SP]*/
01344     U_LB_BREAK_SYMBOLS,     /*[SY]*/
01345     U_LB_ZWSPACE,           /*[ZW]*/
01346     U_LB_COUNT /* 29 */
01347 } ULineBreak;
01348 
01355 typedef enum UNumericType {
01356     U_NT_NONE,              /*[None]*/ /*See note !!*/
01357     U_NT_DECIMAL,           /*[de]*/
01358     U_NT_DIGIT,             /*[di]*/
01359     U_NT_NUMERIC,           /*[nu]*/
01360     U_NT_COUNT
01361 } UNumericType;
01362 
01389 U_CAPI UBool U_EXPORT2
01390 u_hasBinaryProperty(UChar32 c, UProperty which);
01391 
01404 U_CAPI UBool U_EXPORT2
01405 u_isUAlphabetic(UChar32 c);
01406 
01419 U_CAPI UBool U_EXPORT2
01420 u_isULowercase(UChar32 c);
01421 
01434 U_CAPI UBool U_EXPORT2
01435 u_isUUppercase(UChar32 c);
01436 
01450 U_CAPI UBool U_EXPORT2
01451 u_isUWhiteSpace(UChar32 c);
01452 
01453 /*
01454  * ### TODO Document all properties more precisely, how they are based (or not) on UCD files.
01455  * Especially u_isdigit, u_isspace, u_isWhitespace.
01456  */
01457 
01495 U_CAPI int32_t U_EXPORT2
01496 u_getIntPropertyValue(UChar32 c, UProperty which);
01497 
01516 U_CAPI int32_t U_EXPORT2
01517 u_getIntPropertyMinValue(UProperty which);
01518 
01545 U_CAPI int32_t U_EXPORT2
01546 u_getIntPropertyMaxValue(UProperty which);
01547 
01564 U_CAPI double U_EXPORT2
01565 u_getNumericValue(UChar32 c);
01566 
01574 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
01575 
01588 U_CAPI UBool U_EXPORT2
01589 u_islower(UChar32 c);
01590 
01602 U_CAPI UBool U_EXPORT2
01603 u_isupper(UChar32 c);
01604 
01616 U_CAPI UBool U_EXPORT2
01617 u_istitle(UChar32 c);
01618 
01626 U_CAPI UBool U_EXPORT2
01627 u_isdigit(UChar32 c);
01628 
01637 U_CAPI UBool U_EXPORT2
01638 u_isalnum(UChar32 c);
01639 
01655 U_CAPI UBool U_EXPORT2
01656 u_isdefined(UChar32 c);
01657 
01669 U_CAPI UBool U_EXPORT2
01670 u_isalpha(UChar32 c);
01671 
01679 U_CAPI UBool U_EXPORT2
01680 u_isspace(UChar32 c);
01681 
01710 U_CAPI UBool U_EXPORT2
01711 u_isWhitespace(UChar32 c);
01712 
01728 U_CAPI UBool U_EXPORT2
01729 u_iscntrl(UChar32 c);
01730 
01731 
01742 U_CAPI UBool U_EXPORT2
01743 u_isprint(UChar32 c);
01744 
01756 U_CAPI UBool U_EXPORT2
01757 u_isbase(UChar32 c);
01758 
01771 U_CAPI UCharDirection U_EXPORT2
01772 u_charDirection(UChar32 c);
01773 
01784 U_CAPI UBool U_EXPORT2
01785 u_isMirrored(UChar32 c);
01786 
01803 U_CAPI UChar32 U_EXPORT2
01804 u_charMirror(UChar32 c);
01805 
01864 U_CAPI uint16_t U_EXPORT2
01865 u_charCellWidth(UChar32 c);
01866 
01877 U_CAPI int8_t U_EXPORT2
01878 u_charType(UChar32 c);
01879 
01890 #define U_GET_GC_MASK(c) U_MASK(u_charType(c))
01891 
01909 typedef UBool U_CALLCONV
01910 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
01911 
01931 U_CAPI void U_EXPORT2
01932 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
01933 
01941 U_CAPI uint8_t U_EXPORT2
01942 u_getCombiningClass(UChar32 c);
01943 
01952 U_CAPI int32_t U_EXPORT2
01953 u_charDigitValue(UChar32 c);
01954 
01963 U_CAPI UBlockCode U_EXPORT2
01964 ublock_getCode(UChar32    ch);
01965 
01998 U_CAPI int32_t U_EXPORT2
01999 u_charName(UChar32 code, UCharNameChoice nameChoice,
02000            char *buffer, int32_t bufferLength,
02001            UErrorCode *pErrorCode);
02002 
02025 U_CAPI int32_t U_EXPORT2
02026 u_getISOComment(UChar32 c,
02027                 char *dest, int32_t destCapacity,
02028                 UErrorCode *pErrorCode);
02029 
02050 U_CAPI UChar32 U_EXPORT2
02051 u_charFromName(UCharNameChoice nameChoice,
02052                const char *name,
02053                UErrorCode *pErrorCode);
02054 
02072 typedef UBool UEnumCharNamesFn(void *context,
02073                                UChar32 code,
02074                                UCharNameChoice nameChoice,
02075                                const char *name,
02076                                int32_t length);
02077 
02099 U_CAPI void U_EXPORT2
02100 u_enumCharNames(UChar32 start, UChar32 limit,
02101                 UEnumCharNamesFn *fn,
02102                 void *context,
02103                 UCharNameChoice nameChoice,
02104                 UErrorCode *pErrorCode);
02105 
02137 U_CAPI const char* U_EXPORT2
02138 u_getPropertyName(UProperty property,
02139                   UPropertyNameChoice nameChoice);
02140 
02160 U_CAPI UProperty U_EXPORT2
02161 u_getPropertyEnum(const char* alias);
02162 
02210 U_CAPI const char* U_EXPORT2
02211 u_getPropertyValueName(UProperty property,
02212                        int32_t value,
02213                        UPropertyNameChoice nameChoice);
02214 
02246 U_CAPI int32_t U_EXPORT2
02247 u_getPropertyValueEnum(UProperty property,
02248                        const char* alias);
02249 
02266 U_CAPI UBool U_EXPORT2
02267 u_isIDStart(UChar32 c);
02268 
02293 U_CAPI UBool U_EXPORT2
02294 u_isIDPart(UChar32 c);
02295 
02320 U_CAPI UBool U_EXPORT2
02321 u_isIDIgnorable(UChar32 c);
02322 
02343 U_CAPI UBool U_EXPORT2
02344 u_isJavaIDStart(UChar32 c);
02345 
02374 U_CAPI UBool U_EXPORT2
02375 u_isJavaIDPart(UChar32 c);
02376 
02399 U_CAPI UChar32 U_EXPORT2
02400 u_tolower(UChar32 c);
02401 
02417 U_CAPI UChar32 U_EXPORT2
02418 u_toupper(UChar32 c);
02419 
02434 U_CAPI UChar32 U_EXPORT2
02435 u_totitle(UChar32 c);
02436 
02438 #define U_FOLD_CASE_DEFAULT 0
02439 
02456 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
02457 
02471 U_CAPI UChar32 U_EXPORT2
02472 u_foldCase(UChar32 c, uint32_t options);
02473 
02509 U_CAPI int32_t U_EXPORT2
02510 u_digit(UChar32 ch, int8_t radix);
02511 
02538 U_CAPI UChar32 U_EXPORT2
02539 u_forDigit(int32_t digit, int8_t radix);
02540 
02555 U_CAPI void U_EXPORT2
02556 u_charAge(UChar32 c, UVersionInfo versionArray);
02557 
02565 U_CAPI void U_EXPORT2
02566 u_getUnicodeVersion(UVersionInfo info);
02567 
02589 U_CAPI int32_t U_EXPORT2
02590 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
02591 
02592 
02593 #ifdef ICU_UCHAR_USE_DEPRECATES
02594 
02597 #define u_charScript ublock_getCode
02598 
02599 typedef UBlockCode UCharScript;
02600 #endif /* ICU_UCHAR_USE_DEPRECATES */
02601 
02602 U_CDECL_END
02603 
02604 #endif /*_UCHAR*/
02605 /*eof*/

Generated on Wed Dec 18 16:49:52 2002 for ICU 2.4 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001