Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

uchar.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1997-2001, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File UCHAR.H
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   04/02/97    aliu        Creation.
00013 *   03/29/99    helena      Updated for C APIs.
00014 *   4/15/99     Madhu       Updated for C Implementation and Javadoc
00015 *   5/20/99     Madhu       Added the function u_getVersion()
00016 *   8/19/1999   srl         Upgraded scripts to Unicode 3.0
00017 *   8/27/1999   schererm    UCharDirection constants: U_...
00018 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
00019 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
00020 ******************************************************************************
00021 */
00022 
00023 #ifndef UCHAR_H
00024 #define UCHAR_H
00025 
00026 #include "unicode/utypes.h"
00027 /*==========================================================================*/
00028 /* Unicode version number                                                   */
00029 /*==========================================================================*/
00030 #define U_UNICODE_VERSION "3.1.1"
00031 
00053 #define UCHAR_MIN_VALUE 0
00054 
00061 #define UCHAR_MAX_VALUE 0x10ffff
00062 
00068 enum UCharCategory
00069 {
00071     U_UNASSIGNED              = 0,
00073     U_GENERAL_OTHER_TYPES     = 0,
00075     U_UPPERCASE_LETTER        = 1,
00077     U_LOWERCASE_LETTER        = 2,
00079     U_TITLECASE_LETTER        = 3,
00081     U_MODIFIER_LETTER         = 4,
00083     U_OTHER_LETTER            = 5,
00085     U_NON_SPACING_MARK        = 6,
00087     U_ENCLOSING_MARK          = 7,
00089     U_COMBINING_SPACING_MARK  = 8,
00091     U_DECIMAL_DIGIT_NUMBER    = 9,
00093     U_LETTER_NUMBER           = 10,
00095     U_OTHER_NUMBER            = 11,
00097     U_SPACE_SEPARATOR         = 12,
00099     U_LINE_SEPARATOR          = 13,
00101     U_PARAGRAPH_SEPARATOR     = 14,
00103     U_CONTROL_CHAR            = 15,
00105     U_FORMAT_CHAR             = 16,
00107     U_PRIVATE_USE_CHAR        = 17,
00109     U_SURROGATE               = 18,
00111     U_DASH_PUNCTUATION        = 19,
00113     U_START_PUNCTUATION       = 20,
00115     U_END_PUNCTUATION         = 21,
00117     U_CONNECTOR_PUNCTUATION   = 22,
00119     U_OTHER_PUNCTUATION       = 23,
00121     U_MATH_SYMBOL             = 24,
00123     U_CURRENCY_SYMBOL         = 25,
00125     U_MODIFIER_SYMBOL         = 26,
00127     U_OTHER_SYMBOL            = 27,
00129     U_INITIAL_PUNCTUATION     = 28,
00131     U_FINAL_PUNCTUATION       = 29,
00133     U_CHAR_CATEGORY_COUNT
00134 };
00135 
00136 typedef enum UCharCategory UCharCategory;
00137 
00142 enum UCharDirection   { 
00144     U_LEFT_TO_RIGHT               = 0, 
00146     U_RIGHT_TO_LEFT               = 1, 
00148     U_EUROPEAN_NUMBER             = 2,
00150     U_EUROPEAN_NUMBER_SEPARATOR   = 3,
00152     U_EUROPEAN_NUMBER_TERMINATOR  = 4,
00154     U_ARABIC_NUMBER               = 5,
00156     U_COMMON_NUMBER_SEPARATOR     = 6,
00158     U_BLOCK_SEPARATOR             = 7,
00160     U_SEGMENT_SEPARATOR           = 8,
00162     U_WHITE_SPACE_NEUTRAL         = 9, 
00164     U_OTHER_NEUTRAL               = 10, 
00166     U_LEFT_TO_RIGHT_EMBEDDING     = 11,
00168     U_LEFT_TO_RIGHT_OVERRIDE      = 12,
00170     U_RIGHT_TO_LEFT_ARABIC        = 13,
00172     U_RIGHT_TO_LEFT_EMBEDDING     = 14,
00174     U_RIGHT_TO_LEFT_OVERRIDE      = 15,
00176     U_POP_DIRECTIONAL_FORMAT      = 16,
00178     U_DIR_NON_SPACING_MARK        = 17,
00180     U_BOUNDARY_NEUTRAL            = 18,
00182     U_CHAR_DIRECTION_COUNT
00183 };
00184 
00185 typedef enum UCharDirection UCharDirection;
00186 
00192 enum UBlockCode {
00194     UBLOCK_BASIC_LATIN = 1,
00196     U_BASIC_LATIN = 1,
00197 
00199     UBLOCK_LATIN_1_SUPPLEMENT=2,
00201     U_LATIN_1_SUPPLEMENT=2,
00202 
00204     UBLOCK_LATIN_EXTENDED_A =3,
00206     U_LATIN_EXTENDED_A=3,
00207 
00209     UBLOCK_LATIN_EXTENDED_B =4,
00211     U_LATIN_EXTENDED_B=4,
00212 
00214     UBLOCK_IPA_EXTENSIONS =5,
00216     U_IPA_EXTENSIONS=5,
00217     
00219     UBLOCK_SPACING_MODIFIER_LETTERS =6,
00221     U_SPACING_MODIFIER_LETTERS=6,
00222 
00224     UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
00226     U_COMBINING_DIACRITICAL_MARKS=7,
00227     
00229     UBLOCK_GREEK =8,
00231     U_GREEK=8,
00232 
00234     UBLOCK_CYRILLIC =9,
00236     U_CYRILLIC=9,
00237 
00239     UBLOCK_ARMENIAN =10,
00241     U_ARMENIAN=10,
00242 
00244     UBLOCK_HEBREW =11,
00246     U_HEBREW=11,
00247 
00249     UBLOCK_ARABIC =12,
00251     U_ARABIC=12,
00252 
00254     UBLOCK_SYRIAC =13,
00256     U_SYRIAC=13,
00257 
00259     UBLOCK_THAANA =14,
00261     U_THAANA=14,
00262 
00264     UBLOCK_DEVANAGARI =15,
00266     U_DEVANAGARI=15,
00267 
00269     UBLOCK_BENGALI =16,
00271     U_BENGALI=16,
00272 
00274     UBLOCK_GURMUKHI =17,
00276     U_GURMUKHI=17,
00277 
00279     UBLOCK_GUJARATI =18,
00281     U_GUJARATI=18,
00282 
00284     UBLOCK_ORIYA =19,
00286     U_ORIYA=19,
00287 
00289     UBLOCK_TAMIL =20,
00291     U_TAMIL=20,
00292 
00294     UBLOCK_TELUGU =21,
00296     U_TELUGU=21,
00297 
00299     UBLOCK_KANNADA =22,
00301     U_KANNADA=22,
00302 
00304     UBLOCK_MALAYALAM =23,
00306     U_MALAYALAM=23,
00307 
00309     UBLOCK_SINHALA =24,
00311     U_SINHALA=24,
00312 
00314     UBLOCK_THAI =25,
00316     U_THAI=25,
00317 
00319     UBLOCK_LAO =26,
00321     U_LAO=26,
00322 
00324     UBLOCK_TIBETAN =27,
00326     U_TIBETAN=27,
00327 
00329     UBLOCK_MYANMAR =28,
00331     U_MYANMAR=28,
00332 
00334     UBLOCK_GEORGIAN =29,
00336     U_GEORGIAN=29,
00337 
00339     UBLOCK_HANGUL_JAMO =30,
00341     U_HANGUL_JAMO=30,
00342 
00344     UBLOCK_ETHIOPIC =31,
00346     U_ETHIOPIC=31,
00347 
00349     UBLOCK_CHEROKEE =32,
00351     U_CHEROKEE=32,
00352 
00354     UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
00356     U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS=33,
00357 
00359     UBLOCK_OGHAM =34,
00361     U_OGHAM=34,
00362 
00364     UBLOCK_RUNIC =35,
00366     U_RUNIC=35,
00367 
00369     UBLOCK_KHMER =36,
00371     U_KHMER=36,
00372 
00374     UBLOCK_MONGOLIAN =37,
00376     U_MONGOLIAN=37,
00377 
00379     UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
00381     U_LATIN_EXTENDED_ADDITIONAL=38,
00382 
00384     UBLOCK_GREEK_EXTENDED =39,
00386     U_GREEK_EXTENDED=39,
00387 
00389     UBLOCK_GENERAL_PUNCTUATION =40,
00391     U_GENERAL_PUNCTUATION=40,
00392 
00394     UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
00396     U_SUPERSCRIPTS_AND_SUBSCRIPTS=41,
00397     
00399     UBLOCK_CURRENCY_SYMBOLS =42,
00401     U_CURRENCY_SYMBOLS=42,
00402     
00404     UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
00406     U_COMBINING_MARKS_FOR_SYMBOLS=43,
00407     
00409     UBLOCK_LETTERLIKE_SYMBOLS =44,
00411     U_LETTERLIKE_SYMBOLS=44,
00412     
00414     UBLOCK_NUMBER_FORMS =45,
00416     U_NUMBER_FORMS=45,
00417 
00419     UBLOCK_ARROWS =46,
00421     U_ARROWS=46,
00422 
00424     UBLOCK_MATHEMATICAL_OPERATORS =47,
00426     U_MATHEMATICAL_OPERATORS=47,
00427 
00429     UBLOCK_MISCELLANEOUS_TECHNICAL =48,
00431     U_MISCELLANEOUS_TECHNICAL=48,
00432 
00434     UBLOCK_CONTROL_PICTURES =49,
00436     U_CONTROL_PICTURES=49,
00437 
00439     UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
00441     U_OPTICAL_CHARACTER_RECOGNITION=50,
00442 
00444     UBLOCK_ENCLOSED_ALPHANUMERICS =51,
00446     U_ENCLOSED_ALPHANUMERICS=51,
00447 
00449     UBLOCK_BOX_DRAWING =52,
00451     U_BOX_DRAWING=52,
00452 
00454     UBLOCK_BLOCK_ELEMENTS =53,
00456     U_BLOCK_ELEMENTS=53,
00457 
00459     UBLOCK_GEOMETRIC_SHAPES =54,
00461     U_GEOMETRIC_SHAPES=54,
00462 
00464     UBLOCK_MISCELLANEOUS_SYMBOLS =55,
00466     U_MISCELLANEOUS_SYMBOLS=55,
00467 
00469     UBLOCK_DINGBATS =56,
00471     U_DINGBATS=56,
00472 
00474     UBLOCK_BRAILLE_PATTERNS =57,
00476     U_BRAILLE_PATTERNS=57,
00477 
00479     UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
00481     U_CJK_RADICALS_SUPPLEMENT=58,
00482 
00484     UBLOCK_KANGXI_RADICALS =59,
00486     U_KANGXI_RADICALS=59,
00487 
00489     UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
00491     U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS=60,
00492 
00494     UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
00496     U_CJK_SYMBOLS_AND_PUNCTUATION=61,
00497 
00499     UBLOCK_HIRAGANA =62,
00501     U_HIRAGANA=62,
00502 
00504     UBLOCK_KATAKANA =63,
00506     U_KATAKANA=63,
00507 
00509     UBLOCK_BOPOMOFO =64,
00511     U_BOPOMOFO=64,
00512 
00514     UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
00516     U_HANGUL_COMPATIBILITY_JAMO=65,
00517 
00519     UBLOCK_KANBUN =66,
00521     U_KANBUN=66,
00522 
00524     UBLOCK_BOPOMOFO_EXTENDED =67,
00526     U_BOPOMOFO_EXTENDED=67,
00527 
00529     UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
00531     U_ENCLOSED_CJK_LETTERS_AND_MONTHS=68,
00532 
00534     UBLOCK_CJK_COMPATIBILITY =69,
00536     U_CJK_COMPATIBILITY=69,
00537 
00539     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
00541     U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A=70,
00542 
00544     UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
00546     U_CJK_UNIFIED_IDEOGRAPHS=71,
00547 
00549     UBLOCK_YI_SYLLABLES =72,
00551     U_YI_SYLLABLES=72,
00552 
00554     UBLOCK_YI_RADICALS =73,
00556     U_YI_RADICALS=73,
00557 
00559     UBLOCK_HANGUL_SYLLABLES =74,
00561     U_HANGUL_SYLLABLES=74,
00562 
00564     UBLOCK_HIGH_SURROGATES =75,
00566     U_HIGH_SURROGATES=75,
00567 
00569     UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
00571     U_HIGH_PRIVATE_USE_SURROGATES=76,
00572 
00574     UBLOCK_LOW_SURROGATES =77,
00576     U_LOW_SURROGATES=77,
00577 
00579     UBLOCK_PRIVATE_USE_AREA =78,
00581     U_PRIVATE_USE_AREA=78,
00582 
00584     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
00586     U_CJK_COMPATIBILITY_IDEOGRAPHS=79,
00587 
00589     UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
00591     U_ALPHABETIC_PRESENTATION_FORMS=80,
00592 
00594     UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
00596     U_ARABIC_PRESENTATION_FORMS_A=81,
00597 
00599     UBLOCK_COMBINING_HALF_MARKS =82,
00601     U_COMBINING_HALF_MARKS=82,
00602 
00604     UBLOCK_CJK_COMPATIBILITY_FORMS =83,
00606     U_CJK_COMPATIBILITY_FORMS=83,
00607 
00609     UBLOCK_SMALL_FORM_VARIANTS =84,
00611     U_SMALL_FORM_VARIANTS=84,
00612 
00614     UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
00616     U_ARABIC_PRESENTATION_FORMS_B=85,
00617 
00619     UBLOCK_SPECIALS =86,
00621     U_SPECIALS=86,
00622 
00624     UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
00626     U_HALFWIDTH_AND_FULLWIDTH_FORMS=87,
00627     
00629     UBLOCK_OLD_ITALIC = 88  ,
00631     UBLOCK_GOTHIC = 89 ,
00633     UBLOCK_DESERET = 90 ,
00635     UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 ,
00637     UBLOCK_MUSICAL_SYMBOLS = 92 ,
00639     UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93  ,
00641     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94 ,
00643     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 ,
00645     UBLOCK_TAGS = 96 ,
00647     UBLOCK_PRIVATE_USE = 97 ,
00649     UBLOCK_COUNT=98,
00651     U_SCRIPT_COUNT=98,
00652 
00654     UBLOCK_INVALID_CODE=-1,
00655 
00657     U_CHAR_SCRIPT_COUNT =UBLOCK_COUNT,
00659     U_NO_SCRIPT = UBLOCK_COUNT
00660 
00661 };
00662 
00664 typedef enum UBlockCode UBlockCode;
00665 
00670 enum UCellWidth
00671 {
00673     U_ZERO_WIDTH              = 0,
00675     U_HALF_WIDTH              = 1,
00677     U_FULL_WIDTH              = 2,
00679     U_NEUTRAL_WIDTH           = 3,
00681     U_CELL_WIDTH_COUNT
00682 };
00683 
00685 typedef enum UCellWidth UCellWidth;
00686 
00697 enum UCharNameChoice {
00698     U_UNICODE_CHAR_NAME,
00699     U_UNICODE_10_CHAR_NAME,
00700     U_CHAR_NAME_CHOICE_COUNT
00701 };
00702 
00704 typedef enum UCharNameChoice UCharNameChoice;
00705 
00718 U_CAPI UBool U_EXPORT2
00719 u_islower(UChar32 c);
00720 
00732 U_CAPI UBool U_EXPORT2
00733 u_isupper(UChar32 c);
00734 
00746 U_CAPI UBool U_EXPORT2
00747 u_istitle(UChar32 c);
00748 
00756 U_CAPI UBool U_EXPORT2
00757 u_isdigit(UChar32 c);
00758 
00767 U_CAPI UBool U_EXPORT2
00768 u_isalnum(UChar32 c);
00769 
00785 U_CAPI UBool U_EXPORT2
00786 u_isdefined(UChar32 c);
00787 
00799 U_CAPI UBool U_EXPORT2
00800 u_isalpha(UChar32 c);
00801 
00809 U_CAPI UBool U_EXPORT2
00810 u_isspace(UChar32 c);
00811 
00840 U_CAPI UBool U_EXPORT2
00841 u_isWhitespace(UChar32 c);
00842 
00858 U_CAPI UBool U_EXPORT2
00859 u_iscntrl(UChar32 c);
00860 
00861 
00872 U_CAPI UBool U_EXPORT2
00873 u_isprint(UChar32 c);
00874 
00886 U_CAPI UBool U_EXPORT2
00887 u_isbase(UChar32 c);
00888 
00898 U_CAPI UCharDirection U_EXPORT2
00899 u_charDirection(UChar32 c);
00900 
00911 U_CAPI UBool U_EXPORT2
00912 u_isMirrored(UChar32 c);
00913 
00930 U_CAPI UChar32 U_EXPORT2
00931 u_charMirror(UChar32 c);
00932 
00984 U_CAPI uint16_t U_EXPORT2
00985 u_charCellWidth(UChar32 c);
00986 
00995 U_CAPI int8_t U_EXPORT2
00996 u_charType(UChar32 c);
00997 
01005 U_CAPI uint8_t U_EXPORT2
01006 u_getCombiningClass(UChar32 c);
01007 
01016 U_CAPI int32_t U_EXPORT2
01017 u_charDigitValue(UChar32 c);
01018 
01025 U_CAPI UBlockCode U_EXPORT2
01026 ublock_getCode(UChar32    ch);
01027 
01060 U_CAPI UTextOffset U_EXPORT2
01061 u_charName(UChar32 code, UCharNameChoice nameChoice,
01062            char *buffer, UTextOffset bufferLength,
01063            UErrorCode *pErrorCode);
01064 
01081 U_CAPI UChar32 U_EXPORT2
01082 u_charFromName(UCharNameChoice nameChoice,
01083                const char *name,
01084                UErrorCode *pErrorCode);
01085 
01086 U_CDECL_BEGIN
01087 
01104 typedef UBool UEnumCharNamesFn(void *context,
01105                                UChar32 code,
01106                                UCharNameChoice nameChoice,
01107                                const char *name,
01108                                UTextOffset length);
01109 
01110 U_CDECL_END
01111 
01132 U_CAPI void U_EXPORT2
01133 u_enumCharNames(UChar32 start, UChar32 limit,
01134                 UEnumCharNamesFn *fn,
01135                 void *context,
01136                 UCharNameChoice nameChoice,
01137                 UErrorCode *pErrorCode);
01138 
01155 U_CAPI UBool U_EXPORT2
01156 u_isIDStart(UChar32 c);
01157 
01182 U_CAPI UBool U_EXPORT2
01183 u_isIDPart(UChar32 c);
01184 
01209 U_CAPI UBool U_EXPORT2
01210 u_isIDIgnorable(UChar32 c);
01211 
01232 U_CAPI UBool U_EXPORT2
01233 u_isJavaIDStart(UChar32 c);
01234 
01263 U_CAPI UBool U_EXPORT2
01264 u_isJavaIDPart(UChar32 c);
01265 
01288 U_CAPI UChar32 U_EXPORT2
01289 u_tolower(UChar32 c);
01290 
01306 U_CAPI UChar32 U_EXPORT2
01307 u_toupper(UChar32 c);
01308 
01323 U_CAPI UChar32 U_EXPORT2
01324 u_totitle(UChar32 c);
01325 
01327 #define U_FOLD_CASE_DEFAULT 0
01328 
01329 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
01330 
01344 U_CAPI UChar32 U_EXPORT2
01345 u_foldCase(UChar32 c, uint32_t options);
01346 
01382 U_CAPI int32_t U_EXPORT2
01383 u_digit(UChar32 ch, int8_t radix);
01384 
01411 U_CAPI UChar32 U_EXPORT2
01412 u_forDigit(int32_t digit, int8_t radix);
01413 
01421 U_CAPI void U_EXPORT2
01422 u_getUnicodeVersion(UVersionInfo info);
01423 
01424 
01428 #define u_charScript ublock_getCode
01429 
01430 typedef UBlockCode UCharScript;
01431 
01432 #endif /*_UCHAR*/
01433 /*eof*/

Generated on Mon Dec 3 19:00:24 2001 for ICU 2.0 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001