00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef UCHAR_H
00024 #define UCHAR_H
00025
00026 #include "unicode/utypes.h"
00027
00028
00029
00030 #define U_UNICODE_VERSION "3.1.1"
00031
00053 #define UCHAR_MIN_VALUE 0
00054
00061 #define UCHAR_MAX_VALUE 0x10ffff
00062
00068 enum UCharCategory
00069 {
00071 U_UNASSIGNED = 0,
00073 U_GENERAL_OTHER_TYPES = 0,
00075 U_UPPERCASE_LETTER = 1,
00077 U_LOWERCASE_LETTER = 2,
00079 U_TITLECASE_LETTER = 3,
00081 U_MODIFIER_LETTER = 4,
00083 U_OTHER_LETTER = 5,
00085 U_NON_SPACING_MARK = 6,
00087 U_ENCLOSING_MARK = 7,
00089 U_COMBINING_SPACING_MARK = 8,
00091 U_DECIMAL_DIGIT_NUMBER = 9,
00093 U_LETTER_NUMBER = 10,
00095 U_OTHER_NUMBER = 11,
00097 U_SPACE_SEPARATOR = 12,
00099 U_LINE_SEPARATOR = 13,
00101 U_PARAGRAPH_SEPARATOR = 14,
00103 U_CONTROL_CHAR = 15,
00105 U_FORMAT_CHAR = 16,
00107 U_PRIVATE_USE_CHAR = 17,
00109 U_SURROGATE = 18,
00111 U_DASH_PUNCTUATION = 19,
00113 U_START_PUNCTUATION = 20,
00115 U_END_PUNCTUATION = 21,
00117 U_CONNECTOR_PUNCTUATION = 22,
00119 U_OTHER_PUNCTUATION = 23,
00121 U_MATH_SYMBOL = 24,
00123 U_CURRENCY_SYMBOL = 25,
00125 U_MODIFIER_SYMBOL = 26,
00127 U_OTHER_SYMBOL = 27,
00129 U_INITIAL_PUNCTUATION = 28,
00131 U_FINAL_PUNCTUATION = 29,
00133 U_CHAR_CATEGORY_COUNT
00134 };
00135
00136 typedef enum UCharCategory UCharCategory;
00137
00142 enum UCharDirection {
00144 U_LEFT_TO_RIGHT = 0,
00146 U_RIGHT_TO_LEFT = 1,
00148 U_EUROPEAN_NUMBER = 2,
00150 U_EUROPEAN_NUMBER_SEPARATOR = 3,
00152 U_EUROPEAN_NUMBER_TERMINATOR = 4,
00154 U_ARABIC_NUMBER = 5,
00156 U_COMMON_NUMBER_SEPARATOR = 6,
00158 U_BLOCK_SEPARATOR = 7,
00160 U_SEGMENT_SEPARATOR = 8,
00162 U_WHITE_SPACE_NEUTRAL = 9,
00164 U_OTHER_NEUTRAL = 10,
00166 U_LEFT_TO_RIGHT_EMBEDDING = 11,
00168 U_LEFT_TO_RIGHT_OVERRIDE = 12,
00170 U_RIGHT_TO_LEFT_ARABIC = 13,
00172 U_RIGHT_TO_LEFT_EMBEDDING = 14,
00174 U_RIGHT_TO_LEFT_OVERRIDE = 15,
00176 U_POP_DIRECTIONAL_FORMAT = 16,
00178 U_DIR_NON_SPACING_MARK = 17,
00180 U_BOUNDARY_NEUTRAL = 18,
00182 U_CHAR_DIRECTION_COUNT
00183 };
00184
00185 typedef enum UCharDirection UCharDirection;
00186
00192 enum UBlockCode {
00194 UBLOCK_BASIC_LATIN = 1,
00196 U_BASIC_LATIN = 1,
00197
00199 UBLOCK_LATIN_1_SUPPLEMENT=2,
00201 U_LATIN_1_SUPPLEMENT=2,
00202
00204 UBLOCK_LATIN_EXTENDED_A =3,
00206 U_LATIN_EXTENDED_A=3,
00207
00209 UBLOCK_LATIN_EXTENDED_B =4,
00211 U_LATIN_EXTENDED_B=4,
00212
00214 UBLOCK_IPA_EXTENSIONS =5,
00216 U_IPA_EXTENSIONS=5,
00217
00219 UBLOCK_SPACING_MODIFIER_LETTERS =6,
00221 U_SPACING_MODIFIER_LETTERS=6,
00222
00224 UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
00226 U_COMBINING_DIACRITICAL_MARKS=7,
00227
00229 UBLOCK_GREEK =8,
00231 U_GREEK=8,
00232
00234 UBLOCK_CYRILLIC =9,
00236 U_CYRILLIC=9,
00237
00239 UBLOCK_ARMENIAN =10,
00241 U_ARMENIAN=10,
00242
00244 UBLOCK_HEBREW =11,
00246 U_HEBREW=11,
00247
00249 UBLOCK_ARABIC =12,
00251 U_ARABIC=12,
00252
00254 UBLOCK_SYRIAC =13,
00256 U_SYRIAC=13,
00257
00259 UBLOCK_THAANA =14,
00261 U_THAANA=14,
00262
00264 UBLOCK_DEVANAGARI =15,
00266 U_DEVANAGARI=15,
00267
00269 UBLOCK_BENGALI =16,
00271 U_BENGALI=16,
00272
00274 UBLOCK_GURMUKHI =17,
00276 U_GURMUKHI=17,
00277
00279 UBLOCK_GUJARATI =18,
00281 U_GUJARATI=18,
00282
00284 UBLOCK_ORIYA =19,
00286 U_ORIYA=19,
00287
00289 UBLOCK_TAMIL =20,
00291 U_TAMIL=20,
00292
00294 UBLOCK_TELUGU =21,
00296 U_TELUGU=21,
00297
00299 UBLOCK_KANNADA =22,
00301 U_KANNADA=22,
00302
00304 UBLOCK_MALAYALAM =23,
00306 U_MALAYALAM=23,
00307
00309 UBLOCK_SINHALA =24,
00311 U_SINHALA=24,
00312
00314 UBLOCK_THAI =25,
00316 U_THAI=25,
00317
00319 UBLOCK_LAO =26,
00321 U_LAO=26,
00322
00324 UBLOCK_TIBETAN =27,
00326 U_TIBETAN=27,
00327
00329 UBLOCK_MYANMAR =28,
00331 U_MYANMAR=28,
00332
00334 UBLOCK_GEORGIAN =29,
00336 U_GEORGIAN=29,
00337
00339 UBLOCK_HANGUL_JAMO =30,
00341 U_HANGUL_JAMO=30,
00342
00344 UBLOCK_ETHIOPIC =31,
00346 U_ETHIOPIC=31,
00347
00349 UBLOCK_CHEROKEE =32,
00351 U_CHEROKEE=32,
00352
00354 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
00356 U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS=33,
00357
00359 UBLOCK_OGHAM =34,
00361 U_OGHAM=34,
00362
00364 UBLOCK_RUNIC =35,
00366 U_RUNIC=35,
00367
00369 UBLOCK_KHMER =36,
00371 U_KHMER=36,
00372
00374 UBLOCK_MONGOLIAN =37,
00376 U_MONGOLIAN=37,
00377
00379 UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
00381 U_LATIN_EXTENDED_ADDITIONAL=38,
00382
00384 UBLOCK_GREEK_EXTENDED =39,
00386 U_GREEK_EXTENDED=39,
00387
00389 UBLOCK_GENERAL_PUNCTUATION =40,
00391 U_GENERAL_PUNCTUATION=40,
00392
00394 UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
00396 U_SUPERSCRIPTS_AND_SUBSCRIPTS=41,
00397
00399 UBLOCK_CURRENCY_SYMBOLS =42,
00401 U_CURRENCY_SYMBOLS=42,
00402
00404 UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
00406 U_COMBINING_MARKS_FOR_SYMBOLS=43,
00407
00409 UBLOCK_LETTERLIKE_SYMBOLS =44,
00411 U_LETTERLIKE_SYMBOLS=44,
00412
00414 UBLOCK_NUMBER_FORMS =45,
00416 U_NUMBER_FORMS=45,
00417
00419 UBLOCK_ARROWS =46,
00421 U_ARROWS=46,
00422
00424 UBLOCK_MATHEMATICAL_OPERATORS =47,
00426 U_MATHEMATICAL_OPERATORS=47,
00427
00429 UBLOCK_MISCELLANEOUS_TECHNICAL =48,
00431 U_MISCELLANEOUS_TECHNICAL=48,
00432
00434 UBLOCK_CONTROL_PICTURES =49,
00436 U_CONTROL_PICTURES=49,
00437
00439 UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
00441 U_OPTICAL_CHARACTER_RECOGNITION=50,
00442
00444 UBLOCK_ENCLOSED_ALPHANUMERICS =51,
00446 U_ENCLOSED_ALPHANUMERICS=51,
00447
00449 UBLOCK_BOX_DRAWING =52,
00451 U_BOX_DRAWING=52,
00452
00454 UBLOCK_BLOCK_ELEMENTS =53,
00456 U_BLOCK_ELEMENTS=53,
00457
00459 UBLOCK_GEOMETRIC_SHAPES =54,
00461 U_GEOMETRIC_SHAPES=54,
00462
00464 UBLOCK_MISCELLANEOUS_SYMBOLS =55,
00466 U_MISCELLANEOUS_SYMBOLS=55,
00467
00469 UBLOCK_DINGBATS =56,
00471 U_DINGBATS=56,
00472
00474 UBLOCK_BRAILLE_PATTERNS =57,
00476 U_BRAILLE_PATTERNS=57,
00477
00479 UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
00481 U_CJK_RADICALS_SUPPLEMENT=58,
00482
00484 UBLOCK_KANGXI_RADICALS =59,
00486 U_KANGXI_RADICALS=59,
00487
00489 UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
00491 U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS=60,
00492
00494 UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
00496 U_CJK_SYMBOLS_AND_PUNCTUATION=61,
00497
00499 UBLOCK_HIRAGANA =62,
00501 U_HIRAGANA=62,
00502
00504 UBLOCK_KATAKANA =63,
00506 U_KATAKANA=63,
00507
00509 UBLOCK_BOPOMOFO =64,
00511 U_BOPOMOFO=64,
00512
00514 UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
00516 U_HANGUL_COMPATIBILITY_JAMO=65,
00517
00519 UBLOCK_KANBUN =66,
00521 U_KANBUN=66,
00522
00524 UBLOCK_BOPOMOFO_EXTENDED =67,
00526 U_BOPOMOFO_EXTENDED=67,
00527
00529 UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
00531 U_ENCLOSED_CJK_LETTERS_AND_MONTHS=68,
00532
00534 UBLOCK_CJK_COMPATIBILITY =69,
00536 U_CJK_COMPATIBILITY=69,
00537
00539 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
00541 U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A=70,
00542
00544 UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
00546 U_CJK_UNIFIED_IDEOGRAPHS=71,
00547
00549 UBLOCK_YI_SYLLABLES =72,
00551 U_YI_SYLLABLES=72,
00552
00554 UBLOCK_YI_RADICALS =73,
00556 U_YI_RADICALS=73,
00557
00559 UBLOCK_HANGUL_SYLLABLES =74,
00561 U_HANGUL_SYLLABLES=74,
00562
00564 UBLOCK_HIGH_SURROGATES =75,
00566 U_HIGH_SURROGATES=75,
00567
00569 UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
00571 U_HIGH_PRIVATE_USE_SURROGATES=76,
00572
00574 UBLOCK_LOW_SURROGATES =77,
00576 U_LOW_SURROGATES=77,
00577
00579 UBLOCK_PRIVATE_USE_AREA =78,
00581 U_PRIVATE_USE_AREA=78,
00582
00584 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
00586 U_CJK_COMPATIBILITY_IDEOGRAPHS=79,
00587
00589 UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
00591 U_ALPHABETIC_PRESENTATION_FORMS=80,
00592
00594 UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
00596 U_ARABIC_PRESENTATION_FORMS_A=81,
00597
00599 UBLOCK_COMBINING_HALF_MARKS =82,
00601 U_COMBINING_HALF_MARKS=82,
00602
00604 UBLOCK_CJK_COMPATIBILITY_FORMS =83,
00606 U_CJK_COMPATIBILITY_FORMS=83,
00607
00609 UBLOCK_SMALL_FORM_VARIANTS =84,
00611 U_SMALL_FORM_VARIANTS=84,
00612
00614 UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
00616 U_ARABIC_PRESENTATION_FORMS_B=85,
00617
00619 UBLOCK_SPECIALS =86,
00621 U_SPECIALS=86,
00622
00624 UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
00626 U_HALFWIDTH_AND_FULLWIDTH_FORMS=87,
00627
00629 UBLOCK_OLD_ITALIC = 88 ,
00631 UBLOCK_GOTHIC = 89 ,
00633 UBLOCK_DESERET = 90 ,
00635 UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 ,
00637 UBLOCK_MUSICAL_SYMBOLS = 92 ,
00639 UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 ,
00641 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 ,
00643 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 ,
00645 UBLOCK_TAGS = 96 ,
00647 UBLOCK_PRIVATE_USE = 97 ,
00649 UBLOCK_COUNT=98,
00651 U_SCRIPT_COUNT=98,
00652
00654 UBLOCK_INVALID_CODE=-1,
00655
00657 U_CHAR_SCRIPT_COUNT =UBLOCK_COUNT,
00659 U_NO_SCRIPT = UBLOCK_COUNT
00660
00661 };
00662
00664 typedef enum UBlockCode UBlockCode;
00665
00670 enum UCellWidth
00671 {
00673 U_ZERO_WIDTH = 0,
00675 U_HALF_WIDTH = 1,
00677 U_FULL_WIDTH = 2,
00679 U_NEUTRAL_WIDTH = 3,
00681 U_CELL_WIDTH_COUNT
00682 };
00683
00685 typedef enum UCellWidth UCellWidth;
00686
00697 enum UCharNameChoice {
00698 U_UNICODE_CHAR_NAME,
00699 U_UNICODE_10_CHAR_NAME,
00700 U_CHAR_NAME_CHOICE_COUNT
00701 };
00702
00704 typedef enum UCharNameChoice UCharNameChoice;
00705
00718 U_CAPI UBool U_EXPORT2
00719 u_islower(UChar32 c);
00720
00732 U_CAPI UBool U_EXPORT2
00733 u_isupper(UChar32 c);
00734
00746 U_CAPI UBool U_EXPORT2
00747 u_istitle(UChar32 c);
00748
00756 U_CAPI UBool U_EXPORT2
00757 u_isdigit(UChar32 c);
00758
00767 U_CAPI UBool U_EXPORT2
00768 u_isalnum(UChar32 c);
00769
00785 U_CAPI UBool U_EXPORT2
00786 u_isdefined(UChar32 c);
00787
00799 U_CAPI UBool U_EXPORT2
00800 u_isalpha(UChar32 c);
00801
00809 U_CAPI UBool U_EXPORT2
00810 u_isspace(UChar32 c);
00811
00840 U_CAPI UBool U_EXPORT2
00841 u_isWhitespace(UChar32 c);
00842
00858 U_CAPI UBool U_EXPORT2
00859 u_iscntrl(UChar32 c);
00860
00861
00872 U_CAPI UBool U_EXPORT2
00873 u_isprint(UChar32 c);
00874
00886 U_CAPI UBool U_EXPORT2
00887 u_isbase(UChar32 c);
00888
00898 U_CAPI UCharDirection U_EXPORT2
00899 u_charDirection(UChar32 c);
00900
00911 U_CAPI UBool U_EXPORT2
00912 u_isMirrored(UChar32 c);
00913
00930 U_CAPI UChar32 U_EXPORT2
00931 u_charMirror(UChar32 c);
00932
00984 U_CAPI uint16_t U_EXPORT2
00985 u_charCellWidth(UChar32 c);
00986
00995 U_CAPI int8_t U_EXPORT2
00996 u_charType(UChar32 c);
00997
01005 U_CAPI uint8_t U_EXPORT2
01006 u_getCombiningClass(UChar32 c);
01007
01016 U_CAPI int32_t U_EXPORT2
01017 u_charDigitValue(UChar32 c);
01018
01025 U_CAPI UBlockCode U_EXPORT2
01026 ublock_getCode(UChar32 ch);
01027
01060 U_CAPI UTextOffset U_EXPORT2
01061 u_charName(UChar32 code, UCharNameChoice nameChoice,
01062 char *buffer, UTextOffset bufferLength,
01063 UErrorCode *pErrorCode);
01064
01081 U_CAPI UChar32 U_EXPORT2
01082 u_charFromName(UCharNameChoice nameChoice,
01083 const char *name,
01084 UErrorCode *pErrorCode);
01085
01086 U_CDECL_BEGIN
01087
01104 typedef UBool UEnumCharNamesFn(void *context,
01105 UChar32 code,
01106 UCharNameChoice nameChoice,
01107 const char *name,
01108 UTextOffset length);
01109
01110 U_CDECL_END
01111
01132 U_CAPI void U_EXPORT2
01133 u_enumCharNames(UChar32 start, UChar32 limit,
01134 UEnumCharNamesFn *fn,
01135 void *context,
01136 UCharNameChoice nameChoice,
01137 UErrorCode *pErrorCode);
01138
01155 U_CAPI UBool U_EXPORT2
01156 u_isIDStart(UChar32 c);
01157
01182 U_CAPI UBool U_EXPORT2
01183 u_isIDPart(UChar32 c);
01184
01209 U_CAPI UBool U_EXPORT2
01210 u_isIDIgnorable(UChar32 c);
01211
01232 U_CAPI UBool U_EXPORT2
01233 u_isJavaIDStart(UChar32 c);
01234
01263 U_CAPI UBool U_EXPORT2
01264 u_isJavaIDPart(UChar32 c);
01265
01288 U_CAPI UChar32 U_EXPORT2
01289 u_tolower(UChar32 c);
01290
01306 U_CAPI UChar32 U_EXPORT2
01307 u_toupper(UChar32 c);
01308
01323 U_CAPI UChar32 U_EXPORT2
01324 u_totitle(UChar32 c);
01325
01327 #define U_FOLD_CASE_DEFAULT 0
01328
01329 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
01330
01344 U_CAPI UChar32 U_EXPORT2
01345 u_foldCase(UChar32 c, uint32_t options);
01346
01382 U_CAPI int32_t U_EXPORT2
01383 u_digit(UChar32 ch, int8_t radix);
01384
01411 U_CAPI UChar32 U_EXPORT2
01412 u_forDigit(int32_t digit, int8_t radix);
01413
01421 U_CAPI void U_EXPORT2
01422 u_getUnicodeVersion(UVersionInfo info);
01423
01424
01428 #define u_charScript ublock_getCode
01429
01430 typedef UBlockCode UCharScript;
01431
01432 #endif
01433