Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

uchar.h File Reference

C API: Unicode Char. More...

#include "unicode/utypes.h"

Go to the source code of this file.

Defines

#define U_UNICODE_VERSION   "3.1.1"
#define UCHAR_MIN_VALUE   0
 The lowest Unicode code point value. More...

#define UCHAR_MAX_VALUE   0x10ffff
 The highest Unicode code point value (scalar value) according to The Unicode Standard. More...

#define U_FOLD_CASE_DEFAULT   0
 Option value for case folding: use all mappings defined in CaseFolding.txt. More...

#define U_FOLD_CASE_EXCLUDE_SPECIAL_I   1
 Option value for case folding: exclude the mappings for dotted I and dotless i marked with 'I' in CaseFolding.txt. More...

#define u_charScript   ublock_getCode

Typedefs

typedef enum UCharCategory UCharCategory
typedef enum UCharDirection UCharDirection
typedef enum UBlockCode UBlockCode
typedef enum UCellWidth UCellWidth
typedef enum UCharNameChoice UCharNameChoice
typedef UBlockCode UCharScript

Enumerations

enum  UCharCategory {
  U_UNASSIGNED = 0, U_GENERAL_OTHER_TYPES = 0, U_UPPERCASE_LETTER = 1, U_LOWERCASE_LETTER = 2,
  U_TITLECASE_LETTER = 3, U_MODIFIER_LETTER = 4, U_OTHER_LETTER = 5, U_NON_SPACING_MARK = 6,
  U_ENCLOSING_MARK = 7, U_COMBINING_SPACING_MARK = 8, U_DECIMAL_DIGIT_NUMBER = 9, U_LETTER_NUMBER = 10,
  U_OTHER_NUMBER = 11, U_SPACE_SEPARATOR = 12, U_LINE_SEPARATOR = 13, U_PARAGRAPH_SEPARATOR = 14,
  U_CONTROL_CHAR = 15, U_FORMAT_CHAR = 16, U_PRIVATE_USE_CHAR = 17, U_SURROGATE = 18,
  U_DASH_PUNCTUATION = 19, U_START_PUNCTUATION = 20, U_END_PUNCTUATION = 21, U_CONNECTOR_PUNCTUATION = 22,
  U_OTHER_PUNCTUATION = 23, U_MATH_SYMBOL = 24, U_CURRENCY_SYMBOL = 25, U_MODIFIER_SYMBOL = 26,
  U_OTHER_SYMBOL = 27, U_INITIAL_PUNCTUATION = 28, U_FINAL_PUNCTUATION = 29, U_CHAR_CATEGORY_COUNT
}
 Data for enumerated Unicode general category types. More...

enum  UCharDirection {
  U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT
}
 This specifies the language directional property of a character set. More...

enum  UBlockCode {
  UBLOCK_BASIC_LATIN = 1, U_BASIC_LATIN = 1, UBLOCK_LATIN_1_SUPPLEMENT = 2, U_LATIN_1_SUPPLEMENT = 2,
  UBLOCK_LATIN_EXTENDED_A = 3, U_LATIN_EXTENDED_A = 3, UBLOCK_LATIN_EXTENDED_B = 4, U_LATIN_EXTENDED_B = 4,
  UBLOCK_IPA_EXTENSIONS = 5, U_IPA_EXTENSIONS = 5, UBLOCK_SPACING_MODIFIER_LETTERS = 6, U_SPACING_MODIFIER_LETTERS = 6,
  UBLOCK_COMBINING_DIACRITICAL_MARKS = 7, U_COMBINING_DIACRITICAL_MARKS = 7, UBLOCK_GREEK = 8, U_GREEK = 8,
  UBLOCK_CYRILLIC = 9, U_CYRILLIC = 9, UBLOCK_ARMENIAN = 10, U_ARMENIAN = 10,
  UBLOCK_HEBREW = 11, U_HEBREW = 11, UBLOCK_ARABIC = 12, U_ARABIC = 12,
  UBLOCK_SYRIAC = 13, U_SYRIAC = 13, UBLOCK_THAANA = 14, U_THAANA = 14,
  UBLOCK_DEVANAGARI = 15, U_DEVANAGARI = 15, UBLOCK_BENGALI = 16, U_BENGALI = 16,
  UBLOCK_GURMUKHI = 17, U_GURMUKHI = 17, UBLOCK_GUJARATI = 18, U_GUJARATI = 18,
  UBLOCK_ORIYA = 19, U_ORIYA = 19, UBLOCK_TAMIL = 20, U_TAMIL = 20,
  UBLOCK_TELUGU = 21, U_TELUGU = 21, UBLOCK_KANNADA = 22, U_KANNADA = 22,
  UBLOCK_MALAYALAM = 23, U_MALAYALAM = 23, UBLOCK_SINHALA = 24, U_SINHALA = 24,
  UBLOCK_THAI = 25, U_THAI = 25, UBLOCK_LAO = 26, U_LAO = 26,
  UBLOCK_TIBETAN = 27, U_TIBETAN = 27, UBLOCK_MYANMAR = 28, U_MYANMAR = 28,
  UBLOCK_GEORGIAN = 29, U_GEORGIAN = 29, UBLOCK_HANGUL_JAMO = 30, U_HANGUL_JAMO = 30,
  UBLOCK_ETHIOPIC = 31, U_ETHIOPIC = 31, UBLOCK_CHEROKEE = 32, U_CHEROKEE = 32,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, UBLOCK_OGHAM = 34, U_OGHAM = 34,
  UBLOCK_RUNIC = 35, U_RUNIC = 35, UBLOCK_KHMER = 36, U_KHMER = 36,
  UBLOCK_MONGOLIAN = 37, U_MONGOLIAN = 37, UBLOCK_LATIN_EXTENDED_ADDITIONAL = 38, U_LATIN_EXTENDED_ADDITIONAL = 38,
  UBLOCK_GREEK_EXTENDED = 39, U_GREEK_EXTENDED = 39, UBLOCK_GENERAL_PUNCTUATION = 40, U_GENERAL_PUNCTUATION = 40,
  UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS = 41, U_SUPERSCRIPTS_AND_SUBSCRIPTS = 41, UBLOCK_CURRENCY_SYMBOLS = 42, U_CURRENCY_SYMBOLS = 42,
  UBLOCK_COMBINING_MARKS_FOR_SYMBOLS = 43, U_COMBINING_MARKS_FOR_SYMBOLS = 43, UBLOCK_LETTERLIKE_SYMBOLS = 44, U_LETTERLIKE_SYMBOLS = 44,
  UBLOCK_NUMBER_FORMS = 45, U_NUMBER_FORMS = 45, UBLOCK_ARROWS = 46, U_ARROWS = 46,
  UBLOCK_MATHEMATICAL_OPERATORS = 47, U_MATHEMATICAL_OPERATORS = 47, UBLOCK_MISCELLANEOUS_TECHNICAL = 48, U_MISCELLANEOUS_TECHNICAL = 48,
  UBLOCK_CONTROL_PICTURES = 49, U_CONTROL_PICTURES = 49, UBLOCK_OPTICAL_CHARACTER_RECOGNITION = 50, U_OPTICAL_CHARACTER_RECOGNITION = 50,
  UBLOCK_ENCLOSED_ALPHANUMERICS = 51, U_ENCLOSED_ALPHANUMERICS = 51, UBLOCK_BOX_DRAWING = 52, U_BOX_DRAWING = 52,
  UBLOCK_BLOCK_ELEMENTS = 53, U_BLOCK_ELEMENTS = 53, UBLOCK_GEOMETRIC_SHAPES = 54, U_GEOMETRIC_SHAPES = 54,
  UBLOCK_MISCELLANEOUS_SYMBOLS = 55, U_MISCELLANEOUS_SYMBOLS = 55, UBLOCK_DINGBATS = 56, U_DINGBATS = 56,
  UBLOCK_BRAILLE_PATTERNS = 57, U_BRAILLE_PATTERNS = 57, UBLOCK_CJK_RADICALS_SUPPLEMENT = 58, U_CJK_RADICALS_SUPPLEMENT = 58,
  UBLOCK_KANGXI_RADICALS = 59, U_KANGXI_RADICALS = 59, UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60, U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
  UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION = 61, U_CJK_SYMBOLS_AND_PUNCTUATION = 61, UBLOCK_HIRAGANA = 62, U_HIRAGANA = 62,
  UBLOCK_KATAKANA = 63, U_KATAKANA = 63, UBLOCK_BOPOMOFO = 64, U_BOPOMOFO = 64,
  UBLOCK_HANGUL_COMPATIBILITY_JAMO = 65, U_HANGUL_COMPATIBILITY_JAMO = 65, UBLOCK_KANBUN = 66, U_KANBUN = 66,
  UBLOCK_BOPOMOFO_EXTENDED = 67, U_BOPOMOFO_EXTENDED = 67, UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68, U_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
  UBLOCK_CJK_COMPATIBILITY = 69, U_CJK_COMPATIBILITY = 69, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70, U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS = 71, U_CJK_UNIFIED_IDEOGRAPHS = 71, UBLOCK_YI_SYLLABLES = 72, U_YI_SYLLABLES = 72,
  UBLOCK_YI_RADICALS = 73, U_YI_RADICALS = 73, UBLOCK_HANGUL_SYLLABLES = 74, U_HANGUL_SYLLABLES = 74,
  UBLOCK_HIGH_SURROGATES = 75, U_HIGH_SURROGATES = 75, UBLOCK_HIGH_PRIVATE_USE_SURROGATES = 76, U_HIGH_PRIVATE_USE_SURROGATES = 76,
  UBLOCK_LOW_SURROGATES = 77, U_LOW_SURROGATES = 77, UBLOCK_PRIVATE_USE_AREA = 78, U_PRIVATE_USE_AREA = 78,
  UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS = 79, U_CJK_COMPATIBILITY_IDEOGRAPHS = 79, UBLOCK_ALPHABETIC_PRESENTATION_FORMS = 80, U_ALPHABETIC_PRESENTATION_FORMS = 80,
  UBLOCK_ARABIC_PRESENTATION_FORMS_A = 81, U_ARABIC_PRESENTATION_FORMS_A = 81, UBLOCK_COMBINING_HALF_MARKS = 82, U_COMBINING_HALF_MARKS = 82,
  UBLOCK_CJK_COMPATIBILITY_FORMS = 83, U_CJK_COMPATIBILITY_FORMS = 83, UBLOCK_SMALL_FORM_VARIANTS = 84, U_SMALL_FORM_VARIANTS = 84,
  UBLOCK_ARABIC_PRESENTATION_FORMS_B = 85, U_ARABIC_PRESENTATION_FORMS_B = 85, UBLOCK_SPECIALS = 86, U_SPECIALS = 86,
  UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS = 87, U_HALFWIDTH_AND_FULLWIDTH_FORMS = 87, UBLOCK_OLD_ITALIC = 88, UBLOCK_GOTHIC = 89,
  UBLOCK_DESERET = 90, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, UBLOCK_MUSICAL_SYMBOLS = 92, UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, UBLOCK_TAGS = 96, UBLOCK_PRIVATE_USE = 97,
  UBLOCK_COUNT = 98, U_SCRIPT_COUNT = 98, UBLOCK_INVALID_CODE = -1, U_CHAR_SCRIPT_COUNT = UBLOCK_COUNT,
  U_NO_SCRIPT = UBLOCK_COUNT
}
 Constants for Unicode blocks, generated from Unicode Data file Blocks.txt These are the same values as Unicode::EUnicodeScript. More...

enum  UCellWidth {
  U_ZERO_WIDTH = 0, U_HALF_WIDTH = 1, U_FULL_WIDTH = 2, U_NEUTRAL_WIDTH = 3,
  U_CELL_WIDTH_COUNT
}
 Values returned by the u_getCellWidth() function. More...

enum  UCharNameChoice { U_UNICODE_CHAR_NAME, U_UNICODE_10_CHAR_NAME, U_CHAR_NAME_CHOICE_COUNT }
 Selector constants for u_charName(). More...


Functions

U_CAPI UBool U_EXPORT2 u_islower (UChar32 c)
 Determines whether the specified UChar is a lowercase character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isupper (UChar32 c)
 Determines whether the specified character is an uppercase character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_istitle (UChar32 c)
 Determines whether the specified character is a titlecase character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isdigit (UChar32 c)
 Determines whether the specified character is a digit according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isalnum (UChar32 c)
 Determines whether the specified character is an alphanumeric character (letter or digit)according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isdefined (UChar32 c)
 Determines whether the specified numeric value is actually a defined character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isalpha (UChar32 c)
 Determines whether the specified character is a letter according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isspace (UChar32 c)
 Determines if the specified character is a space character or not. More...

U_CAPI UBool U_EXPORT2 u_isWhitespace (UChar32 c)
 Determines if the specified character is white space according to ICU. More...

U_CAPI UBool U_EXPORT2 u_iscntrl (UChar32 c)
 Determines whether the specified character is a control character or not. More...

U_CAPI UBool U_EXPORT2 u_isprint (UChar32 c)
 Determines whether the specified character is a printable character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isbase (UChar32 c)
 Determines whether the specified character is of the base form according to UnicodeData.txt. More...

U_CAPI UCharDirection U_EXPORT2 u_charDirection (UChar32 c)
 Returns the linguistic direction property of a character. More...

U_CAPI UBool U_EXPORT2 u_isMirrored (UChar32 c)
 Determines whether the character has the "mirrored" property. More...

U_CAPI UChar32 U_EXPORT2 u_charMirror (UChar32 c)
 Maps the specified character to a "mirror-image" character. More...

U_CAPI uint16_t U_EXPORT2 u_charCellWidth (UChar32 c)
 Returns a value indicating the display-cell width of the character when used in Asian text, according to the Unicode standard (see p. More...

U_CAPI int8_t U_EXPORT2 u_charType (UChar32 c)
 Returns a value indicating a character category according to UnicodeData.txt. More...

U_CAPI uint8_t U_EXPORT2 u_getCombiningClass (UChar32 c)
 Returns the combining class of the code point as specified in UnicodeData.txt. More...

U_CAPI int32_t U_EXPORT2 u_charDigitValue (UChar32 c)
 Retrives the decimal numeric value of a digit character. More...

U_CAPI UBlockCode U_EXPORT2 ublock_getCode (UChar32 ch)
 Returns the Unicode allocation block that contains the character. More...

U_CAPI UTextOffset U_EXPORT2 u_charName (UChar32 code, UCharNameChoice nameChoice, char *buffer, UTextOffset bufferLength, UErrorCode *pErrorCode)
 Retrieve the name of a Unicode character. More...

U_CAPI UChar32 U_EXPORT2 u_charFromName (UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
 Find a Unicode character by its name and return its code point value. More...

U_CDECL_BEGIN typedef UBool UEnumCharNamesFn (void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, UTextOffset length)
 Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name. More...

U_CDECL_END U_CAPI void U_EXPORT2 u_enumCharNames (UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
 Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name. More...

U_CAPI UBool U_EXPORT2 u_isIDStart (UChar32 c)
 A convenience method for determining if a Unicode character is allowed to start in a Unicode identifier. More...

U_CAPI UBool U_EXPORT2 u_isIDPart (UChar32 c)
 A convenience method for determining if a Unicode character may be part of a Unicode identifier other than the starting character. More...

U_CAPI UBool U_EXPORT2 u_isIDIgnorable (UChar32 c)
 A convenience method for determining if a Unicode character should be regarded as an ignorable character in a Unicode identifier. More...

U_CAPI UBool U_EXPORT2 u_isJavaIDStart (UChar32 c)
 A convenience method for determining if a Unicode character is allowed as the first character in a Java identifier. More...

U_CAPI UBool U_EXPORT2 u_isJavaIDPart (UChar32 c)
 A convenience method for determining if a Unicode character may be part of a Java identifier other than the starting character. More...

U_CAPI UChar32 U_EXPORT2 u_tolower (UChar32 c)
 The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned. More...

U_CAPI UChar32 U_EXPORT2 u_toupper (UChar32 c)
 The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned. More...

U_CAPI UChar32 U_EXPORT2 u_totitle (UChar32 c)
 The given character is mapped to its titlecase equivalent according to UnicodeData.txt. More...

U_CAPI UChar32 U_EXPORT2 u_foldCase (UChar32 c, uint32_t options)
 The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned. More...

U_CAPI int32_t U_EXPORT2 u_digit (UChar32 ch, int8_t radix)
 Returns the numeric value of the character ch in the specified radix. More...

U_CAPI UChar32 U_EXPORT2 u_forDigit (int32_t digit, int8_t radix)
 Determines the character representation for a specific digit in the specified radix. More...

U_CAPI void U_EXPORT2 u_getUnicodeVersion (UVersionInfo info)
 Gets the Unicode version information. More...


Detailed Description

C API: Unicode Char.

Unicode C API

The Unicode C API allows you to query the properties associated with individual Unicode character values.

The Unicode character information, provided implicitly by the Unicode character encoding standard, includes information about the script (for example, symbols or control characters) to which the character belongs, as well as semantic information such as whether a character is a digit or uppercase, lowercase, or uncased.

Definition in file uchar.h.


Define Documentation

#define UCHAR_MAX_VALUE   0x10ffff
 

The highest Unicode code point value (scalar value) according to The Unicode Standard.

This is a 21-bit value (20.1 bits, rounded up). For a single character, UChar32 is a simple type that can hold any code point value.

Stable:

Definition at line 61 of file uchar.h.

#define UCHAR_MIN_VALUE   0
 

The lowest Unicode code point value.

Code points are non-negative.

Stable:

Definition at line 53 of file uchar.h.

#define U_FOLD_CASE_DEFAULT   0
 

Option value for case folding: use all mappings defined in CaseFolding.txt.

Draft:
This API has been introduced in ICU 1.8. It is still in draft state and may be modified in a future release.

Definition at line 1327 of file uchar.h.

#define U_FOLD_CASE_EXCLUDE_SPECIAL_I   1
 

Option value for case folding: exclude the mappings for dotted I and dotless i marked with 'I' in CaseFolding.txt.

Draft:
This API has been introduced in ICU 1.8. It is still in draft state and may be modified in a future release.

Definition at line 1329 of file uchar.h.

#define U_UNICODE_VERSION   "3.1.1"
 

Definition at line 30 of file uchar.h.

#define u_charScript   ublock_getCode
 

Deprecated:
Use u_charBlock instead. Remove after Aug,2002

Definition at line 1428 of file uchar.h.

Referenced by Unicode::getScript().


Typedef Documentation

typedef enum UBlockCode UBlockCode
 

Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.

Definition at line 664 of file uchar.h.

typedef enum UCellWidth UCellWidth
 

Stable:

Definition at line 685 of file uchar.h.

typedef enum UCharCategory UCharCategory
 

Definition at line 136 of file uchar.h.

typedef enum UCharDirection UCharDirection
 

Definition at line 185 of file uchar.h.

typedef enum UCharNameChoice UCharNameChoice
 

Stable:

Definition at line 704 of file uchar.h.

typedef UBlockCode UCharScript
 

Deprecated:
Use the enum UCharBlock instead. Remove after Aug,2002

Definition at line 1430 of file uchar.h.


Enumeration Type Documentation

enum UBlockCode
 

Constants for Unicode blocks, generated from Unicode Data file Blocks.txt These are the same values as Unicode::EUnicodeScript.

Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
Enumeration values:
UBLOCK_BASIC_LATIN 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BASIC_LATIN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_1_SUPPLEMENT 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LATIN_1_SUPPLEMENT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_EXTENDED_A 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LATIN_EXTENDED_A 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_EXTENDED_B 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LATIN_EXTENDED_B 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_IPA_EXTENSIONS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_IPA_EXTENSIONS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SPACING_MODIFIER_LETTERS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SPACING_MODIFIER_LETTERS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_COMBINING_DIACRITICAL_MARKS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_COMBINING_DIACRITICAL_MARKS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GREEK 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GREEK 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CYRILLIC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CYRILLIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARMENIAN 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ARMENIAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HEBREW 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HEBREW 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARABIC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ARABIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SYRIAC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SYRIAC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_THAANA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_THAANA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_DEVANAGARI 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_DEVANAGARI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BENGALI 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BENGALI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GURMUKHI 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GURMUKHI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GUJARATI 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GUJARATI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ORIYA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ORIYA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_TAMIL 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_TAMIL 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_TELUGU 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_TELUGU 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KANNADA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_KANNADA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MALAYALAM 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_MALAYALAM 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SINHALA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SINHALA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_THAI 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_THAI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LAO 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LAO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_TIBETAN 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_TIBETAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MYANMAR 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_MYANMAR 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GEORGIAN 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GEORGIAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HANGUL_JAMO 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HANGUL_JAMO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ETHIOPIC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ETHIOPIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CHEROKEE 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CHEROKEE 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_OGHAM 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_OGHAM 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_RUNIC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_RUNIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KHMER 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_KHMER 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MONGOLIAN 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_MONGOLIAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_EXTENDED_ADDITIONAL 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LATIN_EXTENDED_ADDITIONAL 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GREEK_EXTENDED 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GREEK_EXTENDED 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GENERAL_PUNCTUATION 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GENERAL_PUNCTUATION 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SUPERSCRIPTS_AND_SUBSCRIPTS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CURRENCY_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CURRENCY_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_COMBINING_MARKS_FOR_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_COMBINING_MARKS_FOR_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LETTERLIKE_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LETTERLIKE_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_NUMBER_FORMS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_NUMBER_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARROWS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ARROWS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MATHEMATICAL_OPERATORS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_MATHEMATICAL_OPERATORS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MISCELLANEOUS_TECHNICAL 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_MISCELLANEOUS_TECHNICAL 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CONTROL_PICTURES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CONTROL_PICTURES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_OPTICAL_CHARACTER_RECOGNITION 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_OPTICAL_CHARACTER_RECOGNITION 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ENCLOSED_ALPHANUMERICS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ENCLOSED_ALPHANUMERICS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BOX_DRAWING 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BOX_DRAWING 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BLOCK_ELEMENTS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BLOCK_ELEMENTS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GEOMETRIC_SHAPES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_GEOMETRIC_SHAPES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MISCELLANEOUS_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_MISCELLANEOUS_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_DINGBATS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_DINGBATS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BRAILLE_PATTERNS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BRAILLE_PATTERNS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_RADICALS_SUPPLEMENT 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_RADICALS_SUPPLEMENT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KANGXI_RADICALS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_KANGXI_RADICALS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_SYMBOLS_AND_PUNCTUATION 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HIRAGANA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HIRAGANA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KATAKANA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_KATAKANA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BOPOMOFO 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BOPOMOFO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HANGUL_COMPATIBILITY_JAMO 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HANGUL_COMPATIBILITY_JAMO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KANBUN 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_KANBUN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BOPOMOFO_EXTENDED 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_BOPOMOFO_EXTENDED 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ENCLOSED_CJK_LETTERS_AND_MONTHS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_COMPATIBILITY 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_COMPATIBILITY 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_UNIFIED_IDEOGRAPHS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_UNIFIED_IDEOGRAPHS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_YI_SYLLABLES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_YI_SYLLABLES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_YI_RADICALS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_YI_RADICALS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HANGUL_SYLLABLES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HANGUL_SYLLABLES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HIGH_SURROGATES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HIGH_SURROGATES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HIGH_PRIVATE_USE_SURROGATES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HIGH_PRIVATE_USE_SURROGATES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LOW_SURROGATES 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_LOW_SURROGATES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_PRIVATE_USE_AREA 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_PRIVATE_USE_AREA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_COMPATIBILITY_IDEOGRAPHS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ALPHABETIC_PRESENTATION_FORMS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ALPHABETIC_PRESENTATION_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARABIC_PRESENTATION_FORMS_A 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ARABIC_PRESENTATION_FORMS_A 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_COMBINING_HALF_MARKS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_COMBINING_HALF_MARKS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_COMPATIBILITY_FORMS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CJK_COMPATIBILITY_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SMALL_FORM_VARIANTS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SMALL_FORM_VARIANTS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARABIC_PRESENTATION_FORMS_B 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_ARABIC_PRESENTATION_FORMS_B 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SPECIALS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SPECIALS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_HALFWIDTH_AND_FULLWIDTH_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_OLD_ITALIC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_GOTHIC 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_DESERET 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_BYZANTINE_MUSICAL_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_MUSICAL_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_TAGS 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_PRIVATE_USE 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
UBLOCK_COUNT 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_SCRIPT_COUNT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_INVALID_CODE 
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_CHAR_SCRIPT_COUNT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
U_NO_SCRIPT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002

Definition at line 192 of file uchar.h.

enum UCellWidth
 

Values returned by the u_getCellWidth() function.

Stable:
Enumeration values:
U_ZERO_WIDTH 
Stable:
U_HALF_WIDTH 
Stable:
U_FULL_WIDTH 
Stable:
U_NEUTRAL_WIDTH 
Stable:
U_CELL_WIDTH_COUNT 
Stable:

Definition at line 670 of file uchar.h.

enum UCharCategory
 

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

Stable:
Enumeration values:
U_UNASSIGNED  Non-category for unassigned and non-character code points.

Stable:
U_GENERAL_OTHER_TYPES  Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!).

Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.
U_UPPERCASE_LETTER  Lu.

Stable:
U_LOWERCASE_LETTER  Ll.

Stable:
U_TITLECASE_LETTER  Lt.

Stable:
U_MODIFIER_LETTER  Lm.

Stable:
U_OTHER_LETTER  Lo.

Stable:
U_NON_SPACING_MARK  Mn.

Stable:
U_ENCLOSING_MARK  Me.

Stable:
U_COMBINING_SPACING_MARK  Mc.

Stable:
U_DECIMAL_DIGIT_NUMBER  Nd.

Stable:
U_LETTER_NUMBER  Nl.

Stable:
U_OTHER_NUMBER  No.

Stable:
U_SPACE_SEPARATOR  Zs.

Stable:
U_LINE_SEPARATOR  Zl.

Stable:
U_PARAGRAPH_SEPARATOR  Zp.

Stable:
U_CONTROL_CHAR  Cc.

Stable:
U_FORMAT_CHAR  Cf.

Stable:
U_PRIVATE_USE_CHAR  Co.

Stable:
U_SURROGATE  Cs.

Stable:
U_DASH_PUNCTUATION  Pd.

Stable:
U_START_PUNCTUATION  Ps.

Stable:
U_END_PUNCTUATION  Pe.

Stable:
U_CONNECTOR_PUNCTUATION  Pc.

Stable:
U_OTHER_PUNCTUATION  Po.

Stable:
U_MATH_SYMBOL  Sm.

Stable:
U_CURRENCY_SYMBOL  Sc.

Stable:
U_MODIFIER_SYMBOL  Sk.

Stable:
U_OTHER_SYMBOL  So.

Stable:
U_INITIAL_PUNCTUATION  Pi.

Stable:
U_FINAL_PUNCTUATION  Pf.

Stable:
U_CHAR_CATEGORY_COUNT  One higher than the last enum UCharCategory constant.

Stable:

Definition at line 68 of file uchar.h.

enum UCharDirection
 

This specifies the language directional property of a character set.

Stable:
Enumeration values:
U_LEFT_TO_RIGHT  L.

Stable:
U_RIGHT_TO_LEFT  R.

Stable:
U_EUROPEAN_NUMBER  EN.

Stable:
U_EUROPEAN_NUMBER_SEPARATOR  ES.

Stable:
U_EUROPEAN_NUMBER_TERMINATOR  ET.

Stable:
U_ARABIC_NUMBER  AN.

Stable:
U_COMMON_NUMBER_SEPARATOR  CS.

Stable:
U_BLOCK_SEPARATOR  B.

Stable:
U_SEGMENT_SEPARATOR  S.

Stable:
U_WHITE_SPACE_NEUTRAL  WS.

Stable:
U_OTHER_NEUTRAL  ON.

Stable:
U_LEFT_TO_RIGHT_EMBEDDING  LRE.

Stable:
U_LEFT_TO_RIGHT_OVERRIDE  LRO.

Stable:
U_RIGHT_TO_LEFT_ARABIC  AL.

Stable:
U_RIGHT_TO_LEFT_EMBEDDING  RLE.

Stable:
U_RIGHT_TO_LEFT_OVERRIDE  RLO.

Stable:
U_POP_DIRECTIONAL_FORMAT  PDF.

Stable:
U_DIR_NON_SPACING_MARK  NSM.

Stable:
U_BOUNDARY_NEUTRAL  BN.

Stable:
U_CHAR_DIRECTION_COUNT 
Stable:

Definition at line 142 of file uchar.h.

enum UCharNameChoice
 

Selector constants for u_charName().

u_charName() returns either the "modern" name of a Unicode character or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646.

See also:
u_charName
Stable:
Enumeration values:
U_UNICODE_CHAR_NAME 
U_UNICODE_10_CHAR_NAME 
U_CHAR_NAME_CHOICE_COUNT 

Definition at line 697 of file uchar.h.


Function Documentation

U_CDECL_BEGIN typedef UBool UEnumCharNamesFn void *    context,
UChar32    code,
UCharNameChoice    nameChoice,
const char *    name,
UTextOffset    length
 

Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.

If such a function returns FALSE, then the enumeration is stopped.

Parameters:
context  The context pointer that was passed to u_enumCharNames().
code  The Unicode code point for the character with this name.
nameChoice  Selector for which kind of names is enumerated.
name  The character's name, zero-terminated.
length  The length of the name.
Returns:
TRUE if the enumeration should continue, FALSE to stop it.
See also:
UCharNameChoice , u_enumCharNames

U_CAPI uint16_t U_EXPORT2 u_charCellWidth UChar32    c
 

Returns a value indicating the display-cell width of the character when used in Asian text, according to the Unicode standard (see p.

6-130 of The Unicode Standard, Version 2.0). The results for various characters are as follows:

ZERO_WIDTH: Characters which are considered to take up no display-cell space: control characters format characters line and paragraph separators non-spacing marks combining Hangul jungseong combining Hangul jongseong unassigned Unicode values

HALF_WIDTH: Characters which take up half a cell in standard Asian text: all characters in the General Scripts Area except combining Hangul choseong and the characters called out specifically above as ZERO_WIDTH alphabetic and Arabic presentation forms halfwidth CJK punctuation halfwidth Katakana halfwidth Hangul Jamo halfwidth forms, arrows, and shapes

FULL_WIDTH: Characters which take up a full cell in standard Asian text: combining Hangul choseong all characters in the CJK Phonetics and Symbols Area all characters in the CJK Ideographs Area all characters in the Hangul Syllables Area CJK compatibility ideographs CJK compatibility forms small form variants fullwidth ASCII fullwidth punctuation and currency signs

NEUTRAL: Characters whose cell width is context-dependent: all characters in the Symbols Area, except those specifically called out above all characters in the Surrogates Area all charcaters in the Private Use Area

For Korean text, this algorithm should work properly with properly normalized Korean text. Precomposed Hangul syllables and non-combining jamo are all considered full- width characters. For combining jamo, we treat we treat choseong (initial consonants) as double-width characters and junseong (vowels) and jongseong (final consonants) as non-spacing marks. This will work right in text that uses the precomposed choseong characters instead of teo choseong characters in a row, and which uses the choseong filler character at the beginning of syllables that don't have an initial consonant. The results may be slightly off with Korean text following different conventions.

Stable:

U_CAPI int32_t U_EXPORT2 u_charDigitValue UChar32    c
 

Retrives the decimal numeric value of a digit character.

Parameters:
c  the digit character for which to get the numeric value
Returns:
the numeric value of ch in decimal radix. This method returns -1 if ch is not a valid digit character.
Stable:

U_CAPI UCharDirection U_EXPORT2 u_charDirection UChar32    c
 

Returns the linguistic direction property of a character.

Returns the linguistic direction property of a character. For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional property.

See also:
UCharDirection
Stable:

U_CAPI UChar32 U_EXPORT2 u_charFromName UCharNameChoice    nameChoice,
const char *    name,
UErrorCode   pErrorCode
 

Find a Unicode character by its name and return its code point value.

The name is matched exactly and completely. A Unicode 1.0 name is matched only if it differs from the modern name. Unicode names are all uppercase.

Parameters:
nameChoice  Selector for which name to match.
name  The name to match.
pErrorCode  Pointer to a UErrorCode variable
Returns:
The Unicode code point value of the character with the given name, or 0xffff if there is no such character.
See also:
UCharNameChoice , u_charName , u_enumCharNames

U_CAPI UChar32 U_EXPORT2 u_charMirror UChar32    c
 

Maps the specified character to a "mirror-image" character.

For characters with the "mirrored" property, implementations sometimes need a "poor man's" mapping to another Unicode character (code point) such that the default glyph may serve as the mirror-image of the default glyph of the specified character. This is useful for text conversion to and from codepages with visual order, and for displays without glyph selecetion capabilities.

Parameters:
c  the character (code point, Unicode scalar value) to be mapped
Returns:
another Unicode code point that may serve as a mirror-image substitute, or c itself if there is no such mapping or c does not have the "mirrored" property
Stable:

U_CAPI UTextOffset U_EXPORT2 u_charName UChar32    code,
UCharNameChoice    nameChoice,
char *    buffer,
UTextOffset    bufferLength,
UErrorCode   pErrorCode
 

Retrieve the name of a Unicode character.

Depending on nameChoice, the character name written into the buffer is the "modern" name or the name that was defined in Unicode version 1.0. The name contains only "invariant" characters like A-Z, 0-9, space, and '-'. Unicode 1.0 names are only retrieved if they are different from the modern names and if the data file contains the data for them. gennames may or may not be called with a command line option to include 1.0 names in unames.dat.

Parameters:
code  The character (code point) for which to get the name. It must be 0<=code<0x10ffff.
nameChoice  Selector for which name to get.
buffer  Destination address for copying the name. The name will always be zero-terminated. If there is no name, then the buffer will be set to the empty string.
bufferLength  ==sizeof(buffer)
pErrorCode  Pointer to a UErrorCode variable; check for U_SUCCESS() after u_charName() returns.
Returns:
The length of the name, or 0 if there is no name for this character. If the bufferLength is less than or equal to the length, then the buffer contains the truncated name and the returned length indicates the full length of the name. The length does not include the zero-termination.
See also:
UCharNameChoice , u_charFromName , u_enumCharNames
Stable:

U_CAPI int8_t U_EXPORT2 u_charType UChar32    c
 

Returns a value indicating a character category according to UnicodeData.txt.

Parameters:
c  the character to be tested
Returns:
a value of type int, the character category.
See also:
UCharCategory
Stable:

U_CAPI int32_t U_EXPORT2 u_digit UChar32    ch,
int8_t    radix
 

Returns the numeric value of the character ch in the specified radix.

If the radix is not in the range 2 <= radix <= 36 or if the value of ch is not a valid digit in the specified radix, -1 is returned. A character is a valid digit if at least one of the following is true:

  • The method u_isdigit is true of the character and the Unicode decimal digit value of the character (or its single-character decomposition) is less than the specified radix. In this case the decimal digit value is returned.
  • The character is one of the uppercase Latin letters 'A' through 'Z' and its code is less than radix + 'A' - 10. In this case, ch - 'A' + 10 is returned.
  • The character is one of the lowercase Latin letters 'a' through 'z' and its code is less than radix + 'a' - 10. In this case, ch - 'a' + 10 is returned.
Parameters:
ch  the character to be converted.
radix  the radix.
Returns:
the numeric value represented by the character in the specified radix.
See also:
u_forDigit , u_charDigitValue , u_isdigit
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.

U_CDECL_END U_CAPI void U_EXPORT2 u_enumCharNames UChar32    start,
UChar32    limit,
UEnumCharNamesFn *    fn,
void *    context,
UCharNameChoice    nameChoice,
UErrorCode   pErrorCode
 

Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.

For Unicode 1.0 names, only those are enumerated that differ from the modern names.

Parameters:
start  The first code point in the enumeration range.
limit  One more than the last code point in the enumeration range (the first one after the range).
fn  The function that is to be called for each character name.
context  An arbitrary pointer that is passed to the function.
nameChoice  Selector for which kind of names to enumerate.
pErrorCode  Pointer to a UErrorCode variable
See also:
UCharNameChoice , UEnumCharNamesFn , u_charName , u_charFromName

U_CAPI UChar32 U_EXPORT2 u_foldCase UChar32    c,
uint32_t    options
 

The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.

Only "simple", single-code point case folding mappings are used. "Full" mappings are used by u_strFoldCase().

Parameters:
c  the character to be converted
options  Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
Returns:
the case folding equivalent of the character, if any; otherwise the character itself.
Draft:
This API has been introduced in ICU 1.8. It is still in draft state and may be modified in a future release.

U_CAPI UChar32 U_EXPORT2 u_forDigit int32_t    digit,
int8_t    radix
 

Determines the character representation for a specific digit in the specified radix.

If the value of radix is not a valid radix, or the value of digit is not a valid digit in the specified radix, the null character (U+0000) is returned.

The radix argument is valid if it is greater than or equal to 2 and less than or equal to 36. The digit argument is valid if 0 <= digit < radix.

If the digit is less than 10, then '0' + digit is returned. Otherwise, the value 'a' + digit - 10 is returned.

Parameters:
digit  the number to convert to a character.
radix  the radix.
Returns:
the char representation of the specified digit in the specified radix.
See also:
u_digit , u_charDigitValue , u_isdigit
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.

U_CAPI uint8_t U_EXPORT2 u_getCombiningClass UChar32    c
 

Returns the combining class of the code point as specified in UnicodeData.txt.

Parameters:
c  the code point of the character
Returns:
the combining class of the character
Stable:

U_CAPI void U_EXPORT2 u_getUnicodeVersion UVersionInfo    info
 

Gets the Unicode version information.

The version array stores the version information for the Unicode standard that is currently used by ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.

Parameters:
versionArray  the version # information, the result will be filled in
Stable:

U_CAPI UBool U_EXPORT2 u_isIDIgnorable UChar32    c
 

A convenience method for determining if a Unicode character should be regarded as an ignorable character in a Unicode identifier.

The following Unicode characters are ignorable in a Unicode identifier:
0x0000 through 0x0008, ISO control characters that
0x000E through 0x001B, are not whitespace
and 0x007F through 0x009F
0x200C through 0x200F join controls
0x200A through 0x200E bidirectional controls
0x206A through 0x206F format controls
0xFEFF zero-width no-break space

Parameters:
c  the Unicode character.
Returns:
TRUE if the character may be part of a Unicode identifier; FALSE otherwise.
See also:
u_isIDPart
Stable:

U_CAPI UBool U_EXPORT2 u_isIDPart UChar32    c
 

A convenience method for determining if a Unicode character may be part of a Unicode identifier other than the starting character.

A character may be part of a Unicode identifier if and only if it is one of the following:

  • a letter
  • a connecting punctuation character (such as "_").
  • a digit
  • a numeric letter (such as a Roman numeral character)
  • a combining mark
  • a non-spacing mark
  • an ignorable control character
Parameters:
c  the Unicode character.
Returns:
TRUE if the character may be part of a Unicode identifier; FALSE otherwise.
See also:
u_isIDIgnorable , u_isIDStart
Stable:

U_CAPI UBool U_EXPORT2 u_isIDStart UChar32    c
 

A convenience method for determining if a Unicode character is allowed to start in a Unicode identifier.

A character may start a Unicode identifier if and only if it is a letter.

Parameters:
c  the Unicode character.
Returns:
TRUE if the character may start a Unicode identifier; FALSE otherwise.
See also:
u_isalpha , u_isIDPart
Stable:

U_CAPI UBool U_EXPORT2 u_isJavaIDPart UChar32    c
 

A convenience method for determining if a Unicode character may be part of a Java identifier other than the starting character.

A character may be part of a Java identifier if and only if it is one of the following:

  • a letter
  • a currency symbol (such as "$")
  • a connecting punctuation character (such as "_").
  • a digit
  • a numeric letter (such as a Roman numeral character)
  • a combining mark
  • a non-spacing mark
  • an ignorable control character
Parameters:
c  the Unicode character.
Returns:
TRUE if the character may be part of a Unicode identifier; FALSE otherwise.
See also:
u_isIDIgnorable , u_isJavaIDStart , u_isalpha , u_isdigit , u_isIDPart
Stable:

U_CAPI UBool U_EXPORT2 u_isJavaIDStart UChar32    c
 

A convenience method for determining if a Unicode character is allowed as the first character in a Java identifier.

A character may start a Java identifier if and only if it is one of the following:

  • a letter
  • a currency symbol (such as "$")
  • a connecting punctuation symbol (such as "_").
Parameters:
c  the Unicode character.
Returns:
TRUE if the character may start a Java identifier; FALSE otherwise.
See also:
u_isJavaIDPart , u_isalpha , u_isIDStart
Stable:

U_CAPI UBool U_EXPORT2 u_isMirrored UChar32    c
 

Determines whether the character has the "mirrored" property.

This property is set for characters that are commonly used in Right-To-Left contexts and need to be displayed with a "mirrored" glyph.

Parameters:
c  the character (code point, Unicode scalar value) to be tested
Returns:
TRUE if the character has the "mirrored" property
Stable:

U_CAPI UBool U_EXPORT2 u_isWhitespace UChar32    c
 

Determines if the specified character is white space according to ICU.

A character is considered to be an ICU whitespace character if and only if it satisfies one of the following criteria:

  • It is a Unicode space separator (category "Zs"), but is not a no-break space (&#92;u00A0 or &#92;uFEFF).
  • It is a Unicode line separator (category "Zl").
  • It is a Unicode paragraph separator (category "Zp").
  • It is &#92;u0009, HORIZONTAL TABULATION.
  • It is &#92;u000A, LINE FEED.
  • It is &#92;u000B, VERTICAL TABULATION.
  • It is &#92;u000C, FORM FEED.
  • It is &#92;u000D, CARRIAGE RETURN.
  • It is &#92;u001C, FILE SEPARATOR.
  • It is &#92;u001D, GROUP SEPARATOR.
  • It is &#92;u001E, RECORD SEPARATOR.
  • It is &#92;u001F, UNIT SEPARATOR.
Note: This method corresponds to the Java method java.lang.Character.isWhitespace().
Parameters:
ch  the character to be tested.
Returns:
true if the character is an ICU whitespace character; false otherwise.
See also:
u_isspace
Stable:

U_CAPI UBool U_EXPORT2 u_isalnum UChar32    c
 

Determines whether the specified character is an alphanumeric character (letter or digit)according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is a letter or a digit; false otherwise.
Stable:

U_CAPI UBool U_EXPORT2 u_isalpha UChar32    c
 

Determines whether the specified character is a letter according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is a letter; false otherwise.
See also:
u_isdigit , u_isalnum
Stable:

U_CAPI UBool U_EXPORT2 u_isbase UChar32    c
 

Determines whether the specified character is of the base form according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the Unicode character is of the base form; false otherwise.
See also:
u_isalpha , u_isdigit
Stable:

U_CAPI UBool U_EXPORT2 u_iscntrl UChar32    c
 

Determines whether the specified character is a control character or not.

A control character is one of the following:

  • ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
  • U_CONTROL_CHAR (Cc)
  • U_FORMAT_CHAR (Cf)
  • U_LINE_SEPARATOR (Zl)
  • U_PARAGRAPH_SEPARATOR (Zp)
Parameters:
ch  the character to be tested
Returns:
true if the Unicode character is a control character; false otherwise.
See also:
u_isprint
Stable:

U_CAPI UBool U_EXPORT2 u_isdefined UChar32    c
 

Determines whether the specified numeric value is actually a defined character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character has a defined Unicode meaning; false otherwise.
See also:
u_isdigit , u_isalpha , u_isalnum , u_isupper , u_islower , u_istitle
Stable:

U_CAPI UBool U_EXPORT2 u_isdigit UChar32    c
 

Determines whether the specified character is a digit according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is a digit; false otherwise.
Stable:

U_CAPI UBool U_EXPORT2 u_islower UChar32    c
 

Determines whether the specified UChar is a lowercase character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is lowercase; false otherwise.
See also:
UNICODE_VERSION , u_isupper , u_istitle , u_islower
Stable:

U_CAPI UBool U_EXPORT2 u_isprint UChar32    c
 

Determines whether the specified character is a printable character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the Unicode character is a printable character; false otherwise.
See also:
u_iscntrl
Stable:

U_CAPI UBool U_EXPORT2 u_isspace UChar32    c
 

Determines if the specified character is a space character or not.

Parameters:
ch  the character to be tested
Returns:
true if the character is a space character; false otherwise.
Stable:

U_CAPI UBool U_EXPORT2 u_istitle UChar32    c
 

Determines whether the specified character is a titlecase character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is titlecase; false otherwise.
See also:
u_isupper , u_islower , u_totitle
Stable:

U_CAPI UBool U_EXPORT2 u_isupper UChar32    c
 

Determines whether the specified character is an uppercase character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is uppercase; false otherwise.
See also:
u_islower , u_istitle , u_tolower
Stable:

U_CAPI UChar32 U_EXPORT2 u_tolower UChar32    c
 

The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.

A character has a lowercase equivalent if and only if a lowercase mapping is specified for the character in the UnicodeData.txt attribute table.

u_tolower() only deals with the general letter case conversion. For language specific case conversion behavior, use ustrToUpper(). For example, the case conversion for dot-less i and dotted I in Turkish, or for final sigma in Greek.

Parameters:
ch  the character to be converted
Returns:
the lowercase equivalent of the character, if any; otherwise the character itself.
Stable:

U_CAPI UChar32 U_EXPORT2 u_totitle UChar32    c
 

The given character is mapped to its titlecase equivalent according to UnicodeData.txt.

There are only four Unicode characters that are truly titlecase forms that are distinct from uppercase forms. As a rule, if a character has no true titlecase equivalent, its uppercase equivalent is returned.

A character has a titlecase equivalent if and only if a titlecase mapping is specified for the character in the UnicodeData.txt data.

Parameters:
ch  the character to be converted
Returns:
the titlecase equivalent of the character, if any; otherwise the character itself.
Stable:

U_CAPI UChar32 U_EXPORT2 u_toupper UChar32    c
 

The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.

u_toupper() only deals with the general letter case conversion. For language specific case conversion behavior, use ustrToUpper(). For example, the case conversion for dot-less i and dotted I in Turkish, or ess-zed (i.e., "sharp S") in German.

Parameters:
ch  the character to be converted
Returns:
the uppercase equivalent of the character, if any; otherwise the character itself.
Stable:

U_CAPI UBlockCode U_EXPORT2 ublock_getCode UChar32    ch
 

Returns the Unicode allocation block that contains the character.

See also:
UCharBlock
Draft:
This API has been introduced in ICU 2.0. It is still in draft state and may be modified in a future release.


Generated on Mon Dec 3 19:00:32 2001 for ICU 2.0 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001