00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00087 #ifndef __UTF_H__
00088 #define __UTF_H__
00089
00090
00091
00092
00093
00094 #include <stddef.h>
00095 #include "unicode/umachine.h"
00096
00097
00098
00099 #ifndef UTF_SIZE
00100
00101 # define UTF_SIZE 16
00102 #endif
00103
00105 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00106
00111 #ifndef U_HAVE_WCHAR_H
00112 # define U_HAVE_WCHAR_H 1
00113 #endif
00114
00115
00116 #if U_SIZEOF_WCHAR_T==0
00117 # undef U_SIZEOF_WCHAR_T
00118
00119 # define U_SIZEOF_WCHAR_T 4
00120 #endif
00121
00127 #if U_SIZEOF_WCHAR_T==4
00128 typedef wchar_t UChar32;
00129 #else
00130 typedef uint32_t UChar32;
00131 #endif
00132
00137 typedef int32_t UTextOffset;
00138
00139
00140 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00141
00145 # define UTF_SAFE
00146 #endif
00147
00148
00149
00162 #define UTF8_ERROR_VALUE_1 0x15
00163 #define UTF8_ERROR_VALUE_2 0x9f
00164
00169 #define UTF_ERROR_VALUE 0xffff
00170
00171
00172
00174 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00175
00189 #define UTF_IS_UNICODE_CHAR(c) \
00190 ((uint32_t)(c)<0xd800 || \
00191 ((uint32_t)(c)>0xdfff && \
00192 (uint32_t)(c)<=0x10ffff && \
00193 ((c)&0xfffe)!=0xfffe && \
00194 !(0xfdd0<=(uint32_t)(c) && (uint32_t)(c)<=0xfdef)))
00195
00200 #define UTF_IS_ERROR(c) \
00201 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00202
00204 #define UTF_IS_VALID(c) \
00205 (UTF_IS_UNICODE_CHAR(c) && \
00206 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00207
00208
00209
00210 #include "unicode/utf8.h"
00211 #include "unicode/utf16.h"
00212 #include "unicode/utf32.h"
00213
00214
00215
00222 #if UTF_SIZE==8
00223
00224 # error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00225
00226
00227
00228
00229
00230 # include <limits.h>
00231
00232
00233 # if CHAR_MAX>=255
00234 typedef char UChar;
00235 # else
00236 typedef uint8_t UChar;
00237 # endif
00238
00239 #elif UTF_SIZE==16
00240
00241
00242 # if U_SIZEOF_WCHAR_T==2
00243 typedef wchar_t UChar;
00244 # else
00245 typedef uint16_t UChar;
00246 # endif
00247
00249 # define UTF_IS_SINGLE(uchar) UTF16_IS_SINGLE(uchar)
00250
00251 # define UTF_IS_LEAD(uchar) UTF16_IS_LEAD(uchar)
00252
00253 # define UTF_IS_TRAIL(uchar) UTF16_IS_TRAIL(uchar)
00254
00256 # define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
00257
00258 # define UTF_CHAR_LENGTH(c) UTF16_CHAR_LENGTH(c)
00259
00260 # define UTF_MAX_CHAR_LENGTH UTF16_MAX_CHAR_LENGTH
00261
00262 # define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00263
00265 # define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
00266
00267 # define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00268
00270 # define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00271
00272 # define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00273
00275 # define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00276
00277 # define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00278
00280 # define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
00281
00282 # define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
00283
00285 # define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
00286
00287 # define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
00288
00290 # define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
00291
00292 # define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
00293
00295 # define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
00296
00297 # define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00298
00300 # define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
00301
00302 # define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
00303
00305 # define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
00306
00307 # define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
00308
00310 # define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00311
00312 # define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00313
00314 #elif UTF_SIZE==32
00315
00316 # error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00317
00318 typedef UChar32 UChar;
00319
00320 #else
00321 # error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00322 #endif
00323
00324
00325
00433 #ifdef UTF_SAFE
00434
00435 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00436
00437 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00438 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00439 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00440 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00441 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00442
00443 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00444 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00445 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00446 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00447
00448 #elif defined(UTF_STRICT)
00449
00450 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00451
00452 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00453 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
00454 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length)
00455 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n)
00456 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i)
00457
00458 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00459 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
00460 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
00461 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00462
00463 #else
00464
00465 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00466
00467 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_UNSAFE(s, i, c)
00468 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_UNSAFE(s, i, c)
00469 # define UTF_FWD_1(s, i, length) UTF_FWD_1_UNSAFE(s, i)
00470 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_UNSAFE(s, i, n)
00471 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_UNSAFE(s, i)
00472
00473 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c)
00474 # define UTF_BACK_1(s, start, i) UTF_BACK_1_UNSAFE(s, i)
00475 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_UNSAFE(s, i, n)
00476 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00477
00478 #endif
00479
00480 #endif