00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef UNICODE_H
00028 #define UNICODE_H
00029
00030 #include "unicode/utypes.h"
00031 #include "unicode/uchar.h"
00032
00033 #ifdef ICU_UNICODE_CLASS_USE_DEPRECATES
00034 U_NAMESPACE_BEGIN
00056 class U_COMMON_API Unicode
00057 {
00058 public:
00059
00060
00061
00062
00063
00064
00065
00066 enum {
00068 MIN_VALUE=0,
00069
00075 MAX_VALUE=0x10ffff,
00076
00084 MAX_CHAR_LENGTH=UTF_MAX_CHAR_LENGTH,
00085
00096 MIN_RADIX=2,
00097
00108 MAX_RADIX=36
00109 };
00110
00117 enum EUnicodeGeneralTypes
00118 {
00119 UNASSIGNED = 0,
00120 UPPERCASE_LETTER = 1,
00121 LOWERCASE_LETTER = 2,
00122 TITLECASE_LETTER = 3,
00123 MODIFIER_LETTER = 4,
00124 OTHER_LETTER = 5,
00125 NON_SPACING_MARK = 6,
00126 ENCLOSING_MARK = 7,
00127 COMBINING_SPACING_MARK = 8,
00128 DECIMAL_DIGIT_NUMBER = 9,
00129 LETTER_NUMBER = 10,
00130 OTHER_NUMBER = 11,
00131 SPACE_SEPARATOR = 12,
00132 LINE_SEPARATOR = 13,
00133 PARAGRAPH_SEPARATOR = 14,
00134 CONTROL = 15,
00135 FORMAT = 16,
00136 PRIVATE_USE = 17,
00137 SURROGATE = 18,
00138 DASH_PUNCTUATION = 19,
00139 START_PUNCTUATION = 20,
00140 END_PUNCTUATION = 21,
00141 CONNECTOR_PUNCTUATION = 22,
00142 OTHER_PUNCTUATION = 23,
00143 MATH_SYMBOL = 24,
00144 CURRENCY_SYMBOL = 25,
00145 MODIFIER_SYMBOL = 26,
00146 OTHER_SYMBOL = 27,
00147 INITIAL_PUNCTUATION = 28,
00148 FINAL_PUNCTUATION = 29,
00149 GENERAL_TYPES_COUNT = 30
00150 };
00151
00152
00158 enum EUnicodeScript
00159 {
00160 kBasicLatin=UBLOCK_BASIC_LATIN,
00161 kLatin1Supplement,
00162 kLatinExtendedA,
00163 kLatinExtendedB,
00164 kIPAExtension,
00165 kSpacingModifier,
00166 kCombiningDiacritical,
00167 kGreek,
00168 kCyrillic,
00169 kArmenian,
00170 kHebrew,
00171 kArabic,
00172 kSyriac,
00173 kThaana,
00174 kDevanagari,
00175 kBengali,
00176 kGurmukhi,
00177 kGujarati,
00178 kOriya,
00179 kTamil,
00180 kTelugu,
00181 kKannada,
00182 kMalayalam,
00183 kSinhala,
00184 kThai,
00185 kLao,
00186 kTibetan,
00187 kMyanmar,
00188 kGeorgian,
00189 kHangulJamo,
00190 kEthiopic,
00191 kCherokee,
00192 kUnifiedCanadianAboriginalSyllabics,
00193 kogham,
00194 kRunic,
00195 kKhmer,
00196 kMongolian,
00197 kLatinExtendedAdditional,
00198 kGreekExtended,
00199 kGeneralPunctuation,
00200 kSuperSubScript,
00201 kCurrencySymbolScript,
00202 kSymbolCombiningMark,
00203 kLetterlikeSymbol,
00204 kNumberForm,
00205 kArrow,
00206 kMathOperator,
00207 kMiscTechnical,
00208 kControlPicture,
00209 kOpticalCharacter,
00210 kEnclosedAlphanumeric,
00211 kBoxDrawing,
00212 kBlockElement,
00213 kGeometricShape,
00214 kMiscSymbol,
00215 kDingbat,
00216 kBraillePatterns,
00217 kCJKRadicalsSupplement,
00218 kKangxiRadicals,
00219 kIdeographicDescriptionCharacters,
00220 kCJKSymbolPunctuation,
00221 kHiragana,
00222 kKatakana,
00223 kBopomofo,
00224 kHangulCompatibilityJamo,
00225 kKanbun,
00226 kBopomofoExtended,
00227 kEnclosedCJKLetterMonth,
00228 kCJKCompatibility,
00229 kCJKUnifiedIdeographExtensionA,
00230 kCJKUnifiedIdeograph,
00231 kYiSyllables,
00232 kYiRadicals,
00233 kHangulSyllable,
00234 kHighSurrogate,
00235 kHighPrivateUseSurrogate,
00236 kLowSurrogate,
00237 kPrivateUse,
00238 kCJKCompatibilityIdeograph,
00239 kAlphabeticPresentation,
00240 kArabicPresentationA,
00241 kCombiningHalfMark,
00242 kCJKCompatibilityForm,
00243 kSmallFormVariant,
00244 kArabicPresentationB,
00245 kNoScript,
00246 kHalfwidthFullwidthForm,
00247 kScriptCount=UBLOCK_COUNT
00248 };
00249
00255 enum EDirectionProperty {
00256 LEFT_TO_RIGHT = 0,
00257 RIGHT_TO_LEFT = 1,
00258 EUROPEAN_NUMBER = 2,
00259 EUROPEAN_NUMBER_SEPARATOR = 3,
00260 EUROPEAN_NUMBER_TERMINATOR = 4,
00261 ARABIC_NUMBER = 5,
00262 COMMON_NUMBER_SEPARATOR = 6,
00263 BLOCK_SEPARATOR = 7,
00264 SEGMENT_SEPARATOR = 8,
00265 WHITE_SPACE_NEUTRAL = 9,
00266 OTHER_NEUTRAL = 10,
00267 LEFT_TO_RIGHT_EMBEDDING = 11,
00268 LEFT_TO_RIGHT_OVERRIDE = 12,
00269 RIGHT_TO_LEFT_ARABIC = 13,
00270 RIGHT_TO_LEFT_EMBEDDING = 14,
00271 RIGHT_TO_LEFT_OVERRIDE = 15,
00272 POP_DIRECTIONAL_FORMAT = 16,
00273 DIR_NON_SPACING_MARK = 17,
00274 BOUNDARY_NEUTRAL = 18
00275 };
00276
00283 enum ECellWidths
00284 {
00285 ZERO_WIDTH = 0,
00286 HALF_WIDTH = 1,
00287 FULL_WIDTH = 2,
00288 NEUTRAL = 3
00289 };
00290
00302 static inline UBool isSingle(UChar c);
00303
00313 static inline UBool isLead(UChar c);
00314
00324 static inline UBool isTrail(UChar c);
00325
00337 static inline UBool isSurrogate(UChar32 c);
00338
00352 static inline UBool isUnicodeChar(UChar32 c);
00353
00366 static inline UBool isError(UChar32 c);
00367
00378 static inline UBool isValid(UChar32 c);
00379
00392 static inline UBool needMultipleUChar(UChar32 c);
00393
00403 static inline int32_t charLength(UChar32 c);
00404
00419 static inline int32_t arraySize(int32_t size);
00420
00434 static inline UBool isLowerCase(UChar32 ch);
00435
00448 static inline UBool isUpperCase(UChar32 ch);
00449
00462 static inline UBool isTitleCase(UChar32 ch);
00463
00476 static inline UBool isDigit(UChar32 ch);
00477
00494 static inline UBool isDefined(UChar32 ch);
00495
00507 static inline UBool isControl(UChar32 ch);
00508
00520 static inline UBool isPrintable(UChar32 ch);
00521
00534 static inline UBool isBaseForm(UChar32 ch);
00535
00552 static inline UBool isLetter(UChar32 ch);
00553
00575 static inline UBool isJavaIdentifierStart(UChar32 ch);
00576
00606 static inline UBool isJavaIdentifierPart(UChar32 ch);
00607
00623 static inline UBool isUnicodeIdentifierStart(UChar32 ch);
00624
00652 static inline UBool isUnicodeIdentifierPart(UChar32 ch);
00653
00680 static inline UBool isIdentifierIgnorable(UChar32 ch);
00681
00707 static inline UChar32 toLowerCase(UChar32 ch);
00708
00731 static inline UChar32 toUpperCase(UChar32 ch);
00732
00751 static inline UChar32 toTitleCase(UChar32 ch);
00752
00767 static inline UChar32
00768 foldCase(UChar32 c, uint32_t options);
00769
00779 static inline UBool isSpaceChar(UChar32 ch);
00780
00810 static inline UBool isWhitespace(UChar32 ch);
00811
00847 static inline int8_t getType(UChar32 ch);
00848
00857 static inline uint8_t getCombiningClass(UChar32 c);
00858
00871 static inline EDirectionProperty characterDirection(UChar32 ch);
00872
00884 static inline UBool isMirrored(UChar32 c);
00885
00903 static inline UChar32 charMirror(UChar32 c);
00904
00913 static inline EUnicodeScript getScript(UChar32 ch);
00914
00968 static inline uint16_t getCellWidth(UChar32 ch);
00969
00998 static inline int32_t
00999 getCharName(uint32_t code,
01000 char *buffer, int32_t bufferLength,
01001 UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME);
01002
01014 static inline int32_t digitValue(UChar32 ch);
01015
01054 static inline int32_t digit(UChar32 ch, int8_t radix);
01055
01084 static inline UChar32 forDigit(int32_t digit, int8_t radix);
01085
01092 static void getUnicodeVersion(UVersionInfo info);
01093
01094 protected:
01095
01096
01097
01098
01099
01100 Unicode();
01101
01102
01103
01104 Unicode(const Unicode &other);
01105 ~Unicode();
01106
01107
01108
01109
01110 const Unicode &operator=(const Unicode &other);
01111 };
01112
01113
01114
01115 inline UBool
01116 Unicode::isSingle(UChar c) {
01117 return UTF_IS_SINGLE(c);
01118 }
01119
01120 inline UBool
01121 Unicode::isLead(UChar c) {
01122 return UTF_IS_LEAD(c);
01123 }
01124
01125 inline UBool
01126 Unicode::isTrail(UChar c) {
01127 return UTF_IS_TRAIL(c);
01128 }
01129
01130 inline UBool
01131 Unicode::isSurrogate(UChar32 c) {
01132 return UTF_IS_SURROGATE(c);
01133 }
01134
01135 inline UBool
01136 Unicode::isUnicodeChar(UChar32 c) {
01137 return UTF_IS_UNICODE_CHAR(c);
01138 }
01139
01140 inline UBool
01141 Unicode::isError(UChar32 c) {
01142 return UTF_IS_ERROR(c);
01143 }
01144
01145 inline UBool
01146 Unicode::isValid(UChar32 c) {
01147 return UTF_IS_VALID(c);
01148 }
01149
01150 inline UBool
01151 Unicode::needMultipleUChar(UChar32 c) {
01152 return UTF_NEED_MULTIPLE_UCHAR(c);
01153 }
01154
01155 inline int32_t
01156 Unicode::charLength(UChar32 c) {
01157 return UTF_CHAR_LENGTH(c);
01158 }
01159
01160 inline int32_t
01161 Unicode::arraySize(int32_t size) {
01162 return UTF_ARRAY_SIZE(size);
01163 }
01164
01165
01166 inline UBool
01167 Unicode::isLowerCase(UChar32 ch) {
01168 return u_islower(ch);
01169 }
01170
01171
01172 inline UBool
01173 Unicode::isUpperCase(UChar32 ch) {
01174 return u_isupper(ch);
01175 }
01176
01177
01178 inline UBool
01179 Unicode::isTitleCase(UChar32 ch) {
01180 return u_istitle(ch);
01181 }
01182
01183
01184 inline UBool
01185 Unicode::isDigit(UChar32 ch) {
01186 return u_isdigit(ch);
01187 }
01188
01189
01190 inline UBool
01191 Unicode::isDefined(UChar32 ch) {
01192 return u_isdefined(ch);
01193 }
01194
01195
01196 inline UBool
01197 Unicode::isControl(UChar32 ch) {
01198 return u_iscntrl(ch);
01199 }
01200
01201
01202 inline UBool
01203 Unicode::isPrintable(UChar32 ch) {
01204 return u_isprint(ch);
01205 }
01206
01207
01208 inline UBool
01209 Unicode::isBaseForm(UChar32 ch) {
01210 return u_isbase(ch);
01211 }
01212
01213
01214 inline UBool
01215 Unicode::isLetter(UChar32 ch) {
01216 return u_isalpha(ch);
01217 }
01218
01219
01220 inline UBool
01221 Unicode::isJavaIdentifierStart(UChar32 ch) {
01222 return u_isJavaIDStart(ch);
01223 }
01224
01225
01226
01227 inline UBool
01228 Unicode::isJavaIdentifierPart(UChar32 ch) {
01229 return u_isJavaIDPart(ch);
01230 }
01231
01232
01233 inline UBool
01234 Unicode::isUnicodeIdentifierStart(UChar32 ch) {
01235 return u_isIDStart(ch);
01236 }
01237
01238
01239
01240 inline UBool
01241 Unicode::isUnicodeIdentifierPart(UChar32 ch) {
01242 return u_isIDPart(ch);
01243 }
01244
01245
01246 inline UBool
01247 Unicode::isIdentifierIgnorable(UChar32 ch) {
01248 return u_isIDIgnorable(ch);
01249 }
01250
01251
01252 inline UChar32
01253 Unicode::toLowerCase(UChar32 ch) {
01254 return u_tolower(ch);
01255 }
01256
01257
01258 inline UChar32
01259 Unicode::toUpperCase(UChar32 ch) {
01260 return u_toupper(ch);
01261 }
01262
01263
01264 inline UChar32
01265 Unicode::toTitleCase(UChar32 ch) {
01266 return u_totitle(ch);
01267 }
01268
01269
01270 inline UChar32
01271 Unicode::foldCase(UChar32 ch, uint32_t options) {
01272 return u_foldCase(ch, options);
01273 }
01274
01275
01276 inline UBool
01277 Unicode::isSpaceChar(UChar32 ch) {
01278 return u_isspace(ch);
01279 }
01280
01281
01282 inline UBool
01283 Unicode::isWhitespace(UChar32 ch) {
01284 return u_isWhitespace(ch);
01285 }
01286
01287
01288 inline int8_t
01289 Unicode::getType(UChar32 ch) {
01290 return u_charType(ch);
01291 }
01292
01293 inline uint8_t
01294 Unicode::getCombiningClass(UChar32 c) {
01295 return u_getCombiningClass(c);
01296 }
01297
01298
01299 inline Unicode::EDirectionProperty
01300 Unicode::characterDirection(UChar32 ch) {
01301 return (EDirectionProperty)u_charDirection(ch);
01302 }
01303
01304
01305 inline UBool
01306 Unicode::isMirrored(UChar32 ch) {
01307 return u_isMirrored(ch);
01308 }
01309
01310
01311 inline UChar32
01312 Unicode::charMirror(UChar32 ch) {
01313 return u_charMirror(ch);
01314 }
01315
01316
01317 inline Unicode::EUnicodeScript
01318 Unicode::getScript(UChar32 ch) {
01319 return (EUnicodeScript) u_charScript(ch);
01320 }
01321
01322
01323 inline uint16_t
01324 Unicode::getCellWidth(UChar32 ch) {
01325 return u_charCellWidth(ch);
01326 }
01327
01328 inline int32_t
01329 Unicode::getCharName(uint32_t code,
01330 char *buffer, int32_t bufferLength,
01331 UCharNameChoice nameChoice) {
01332 UErrorCode errorCode=U_ZERO_ERROR;
01333 int32_t length=u_charName(code, nameChoice, buffer, bufferLength, &errorCode);
01334 return U_SUCCESS(errorCode) ? length : 0;
01335 }
01336
01337 inline int32_t
01338 Unicode::digitValue(UChar32 ch) {
01339 return u_charDigitValue(ch);
01340 }
01341
01342 inline int32_t
01343 Unicode::digit(UChar32 ch, int8_t radix) {
01344 return u_digit(ch, radix);
01345 }
01346
01347 inline UChar32
01348 Unicode::forDigit(int32_t digit, int8_t radix) {
01349 return u_forDigit(digit, radix);
01350 }
01351
01352 inline void
01353 Unicode::getUnicodeVersion(UVersionInfo versionArray) {
01354 u_getUnicodeVersion(versionArray);
01355 }
01356 U_NAMESPACE_END
01357 #else
01358
01359 #error "The unicode/unicode.h header is obsolete. Please use the Unicode C API in unicode/uchar.h instead."
01360
01361 #endif
01362
01363 #endif