00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1997-2000, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File UCHAR.H 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 04/02/97 aliu Creation. 00013 * 03/29/99 helena Updated for C APIs. 00014 * 4/15/99 Madhu Updated for C Implementation and Javadoc 00015 * 5/20/99 Madhu Added the function u_getVersion() 00016 * 8/19/1999 srl Upgraded scripts to Unicode 3.0 00017 * 8/27/1999 schererm UCharDirection constants: U_... 00018 * 11/11/1999 weiv added u_isalnum(), cleaned comments 00019 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). 00020 ******************************************************************************** 00021 */ 00022 00023 #ifndef UCHAR_H 00024 #define UCHAR_H 00025 00026 #include "unicode/utypes.h" 00027 /*===========================================================================*/ 00028 /* Unicode version number */ 00029 /*===========================================================================*/ 00030 #define U_UNICODE_VERSION "3.0.0" 00031 00049 #define UCHAR_MIN_VALUE 0 00050 00056 #define UCHAR_MAX_VALUE 0x10ffff 00057 00062 enum UCharCategory 00063 { 00065 U_UNASSIGNED = 0, 00067 U_UPPERCASE_LETTER = 1, 00069 U_LOWERCASE_LETTER = 2, 00071 U_TITLECASE_LETTER = 3, 00073 U_MODIFIER_LETTER = 4, 00075 U_OTHER_LETTER = 5, 00077 U_NON_SPACING_MARK = 6, 00079 U_ENCLOSING_MARK = 7, 00081 U_COMBINING_SPACING_MARK = 8, 00083 U_DECIMAL_DIGIT_NUMBER = 9, 00085 U_LETTER_NUMBER = 10, 00087 U_OTHER_NUMBER = 11, 00089 U_SPACE_SEPARATOR = 12, 00091 U_LINE_SEPARATOR = 13, 00093 U_PARAGRAPH_SEPARATOR = 14, 00095 U_CONTROL_CHAR = 15, 00097 U_FORMAT_CHAR = 16, 00099 U_PRIVATE_USE_CHAR = 17, 00101 U_SURROGATE = 18, 00103 U_DASH_PUNCTUATION = 19, 00105 U_START_PUNCTUATION = 20, 00107 U_END_PUNCTUATION = 21, 00109 U_CONNECTOR_PUNCTUATION = 22, 00111 U_OTHER_PUNCTUATION = 23, 00113 U_MATH_SYMBOL = 24, 00115 U_CURRENCY_SYMBOL = 25, 00117 U_MODIFIER_SYMBOL = 26, 00119 U_OTHER_SYMBOL = 27, 00121 U_INITIAL_PUNCTUATION = 28, 00123 U_FINAL_PUNCTUATION = 29, 00125 U_GENERAL_OTHER_TYPES = 30, 00127 U_CHAR_CATEGORY_COUNT 00128 }; 00129 00130 typedef enum UCharCategory UCharCategory; 00134 enum UCharDirection { 00136 U_LEFT_TO_RIGHT = 0, 00138 U_RIGHT_TO_LEFT = 1, 00140 U_EUROPEAN_NUMBER = 2, 00142 U_EUROPEAN_NUMBER_SEPARATOR = 3, 00144 U_EUROPEAN_NUMBER_TERMINATOR = 4, 00146 U_ARABIC_NUMBER = 5, 00148 U_COMMON_NUMBER_SEPARATOR = 6, 00150 U_BLOCK_SEPARATOR = 7, 00152 U_SEGMENT_SEPARATOR = 8, 00154 U_WHITE_SPACE_NEUTRAL = 9, 00156 U_OTHER_NEUTRAL = 10, 00158 U_LEFT_TO_RIGHT_EMBEDDING = 11, 00160 U_LEFT_TO_RIGHT_OVERRIDE = 12, 00162 U_RIGHT_TO_LEFT_ARABIC = 13, 00164 U_RIGHT_TO_LEFT_EMBEDDING = 14, 00166 U_RIGHT_TO_LEFT_OVERRIDE = 15, 00168 U_POP_DIRECTIONAL_FORMAT = 16, 00170 U_DIR_NON_SPACING_MARK = 17, 00172 U_BOUNDARY_NEUTRAL = 18, 00174 U_CHAR_DIRECTION_COUNT 00175 }; 00176 00177 typedef enum UCharDirection UCharDirection; 00185 enum UCharScript { 00186 /* Script names */ 00188 U_BASIC_LATIN, 00190 U_LATIN_1_SUPPLEMENT, 00192 U_LATIN_EXTENDED_A, 00194 U_LATIN_EXTENDED_B, 00196 U_IPA_EXTENSIONS, 00198 U_SPACING_MODIFIER_LETTERS, 00200 U_COMBINING_DIACRITICAL_MARKS, 00202 U_GREEK, 00204 U_CYRILLIC, 00206 U_ARMENIAN, 00208 U_HEBREW, 00210 U_ARABIC, 00212 U_SYRIAC, 00214 U_THAANA, 00216 U_DEVANAGARI, 00218 U_BENGALI, 00220 U_GURMUKHI, 00222 U_GUJARATI, 00224 U_ORIYA, 00226 U_TAMIL, 00228 U_TELUGU, 00230 U_KANNADA, 00232 U_MALAYALAM, 00234 U_SINHALA, 00236 U_THAI, 00238 U_LAO, 00240 U_TIBETAN, 00242 U_MYANMAR, 00244 U_GEORGIAN, 00246 U_HANGUL_JAMO, 00248 U_ETHIOPIC, 00250 U_CHEROKEE, 00252 U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 00254 U_OGHAM, 00256 U_RUNIC, 00258 U_KHMER, 00260 U_MONGOLIAN, 00262 U_LATIN_EXTENDED_ADDITIONAL, 00264 U_GREEK_EXTENDED, 00266 U_GENERAL_PUNCTUATION, 00268 U_SUPERSCRIPTS_AND_SUBSCRIPTS, 00270 U_CURRENCY_SYMBOLS, 00272 U_COMBINING_MARKS_FOR_SYMBOLS, 00274 U_LETTERLIKE_SYMBOLS, 00276 U_NUMBER_FORMS, 00278 U_ARROWS, 00280 U_MATHEMATICAL_OPERATORS, 00282 U_MISCELLANEOUS_TECHNICAL, 00284 U_CONTROL_PICTURES, 00286 U_OPTICAL_CHARACTER_RECOGNITION, 00288 U_ENCLOSED_ALPHANUMERICS, 00290 U_BOX_DRAWING, 00292 U_BLOCK_ELEMENTS, 00294 U_GEOMETRIC_SHAPES, 00296 U_MISCELLANEOUS_SYMBOLS, 00298 U_DINGBATS, 00300 U_BRAILLE_PATTERNS, 00302 U_CJK_RADICALS_SUPPLEMENT, 00304 U_KANGXI_RADICALS, 00306 U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 00308 U_CJK_SYMBOLS_AND_PUNCTUATION, 00310 U_HIRAGANA, 00312 U_KATAKANA, 00314 U_BOPOMOFO, 00316 U_HANGUL_COMPATIBILITY_JAMO, 00318 U_KANBUN, 00320 U_BOPOMOFO_EXTENDED, 00322 U_ENCLOSED_CJK_LETTERS_AND_MONTHS, 00324 U_CJK_COMPATIBILITY, 00326 U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 00328 U_CJK_UNIFIED_IDEOGRAPHS, 00330 U_YI_SYLLABLES, 00332 U_YI_RADICALS, 00334 U_HANGUL_SYLLABLES, 00336 U_HIGH_SURROGATES, 00338 U_HIGH_PRIVATE_USE_SURROGATES, 00340 U_LOW_SURROGATES, 00342 U_PRIVATE_USE_AREA, 00344 U_CJK_COMPATIBILITY_IDEOGRAPHS, 00346 U_ALPHABETIC_PRESENTATION_FORMS, 00348 U_ARABIC_PRESENTATION_FORMS_A, 00350 U_COMBINING_HALF_MARKS, 00352 U_CJK_COMPATIBILITY_FORMS, 00354 U_SMALL_FORM_VARIANTS, 00356 U_ARABIC_PRESENTATION_FORMS_B, 00358 U_SPECIALS, 00360 U_HALFWIDTH_AND_FULLWIDTH_FORMS, 00362 U_CHAR_SCRIPT_COUNT, 00364 U_NO_SCRIPT=U_CHAR_SCRIPT_COUNT 00365 }; 00366 typedef enum UCharScript UCharScript; 00367 00372 enum UCellWidth 00373 { 00375 U_ZERO_WIDTH = 0, 00377 U_HALF_WIDTH = 1, 00379 U_FULL_WIDTH = 2, 00381 U_NEUTRAL_WIDTH = 3, 00383 U_CELL_WIDTH_COUNT 00384 }; 00385 00386 typedef enum UCellWidth UCellWidth; 00387 00397 enum UCharNameChoice { 00398 U_UNICODE_CHAR_NAME, 00399 U_UNICODE_10_CHAR_NAME, 00400 U_CHAR_NAME_CHOICE_COUNT 00401 }; 00402 00403 typedef enum UCharNameChoice UCharNameChoice; 00404 00421 U_CAPI UBool U_EXPORT2 00422 u_islower(UChar32 c); 00423 00435 U_CAPI UBool U_EXPORT2 00436 u_isupper(UChar32 c); 00437 00449 U_CAPI UBool U_EXPORT2 00450 u_istitle(UChar32 c); 00451 00460 U_CAPI UBool U_EXPORT2 00461 u_isdigit(UChar32 c); 00462 00471 U_CAPI UBool U_EXPORT2 00472 u_isalnum(UChar32 c); 00473 00489 U_CAPI UBool U_EXPORT2 00490 u_isdefined(UChar32 c); 00491 00503 U_CAPI UBool U_EXPORT2 00504 u_isalpha(UChar32 c); 00505 00513 U_CAPI UBool U_EXPORT2 00514 u_isspace(UChar32 c); 00515 00544 U_CAPI UBool U_EXPORT2 00545 u_isWhitespace(UChar32 c); 00546 00556 U_CAPI UBool U_EXPORT2 00557 u_iscntrl(UChar32 c); 00558 00559 00570 U_CAPI UBool U_EXPORT2 00571 u_isprint(UChar32 c); 00572 00584 U_CAPI UBool U_EXPORT2 00585 u_isbase(UChar32 c); 00586 00596 U_CAPI UCharDirection U_EXPORT2 00597 u_charDirection(UChar32 c); 00598 00608 U_CAPI UBool U_EXPORT2 00609 u_isMirrored(UChar32 c); 00610 00626 U_CAPI UChar32 U_EXPORT2 00627 u_charMirror(UChar32 c); 00628 00680 U_CAPI uint16_t U_EXPORT2 00681 u_charCellWidth(UChar32 c); 00682 00691 U_CAPI int8_t U_EXPORT2 00692 u_charType(UChar32 c); 00693 00702 U_CAPI int32_t U_EXPORT2 00703 u_charDigitValue(UChar32 c); 00704 00711 U_CAPI UCharScript U_EXPORT2 00712 u_charScript(UChar32 ch); 00713 00734 U_CAPI UTextOffset U_EXPORT2 00735 u_charName(uint32_t code, UCharNameChoice nameChoice, 00736 char *buffer, UTextOffset bufferLength, 00737 UErrorCode *pErrorCode); 00738 00755 U_CAPI UBool U_EXPORT2 00756 u_isIDStart(UChar32 c); 00781 U_CAPI UBool U_EXPORT2 00782 u_isIDPart(UChar32 c); 00807 U_CAPI UBool U_EXPORT2 00808 u_isIDIgnorable(UChar32 c); 00829 U_CAPI UBool U_EXPORT2 00830 u_isJavaIDStart(UChar32 c); 00860 U_CAPI UBool U_EXPORT2 00861 u_isJavaIDPart(UChar32 c); 00862 00885 U_CAPI UChar32 U_EXPORT2 00886 u_tolower(UChar32 c); 00887 00903 U_CAPI UChar32 U_EXPORT2 00904 u_toupper(UChar32 c); 00919 U_CAPI UChar32 U_EXPORT2 00920 u_totitle(UChar32 c); 00921 00929 U_CAPI void U_EXPORT2 00930 u_getUnicodeVersion(UVersionInfo info); 00931 00932 #endif /*_UCHAR*/ 00933 /*eof*/