Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

utf.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999-2001, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 1999sep09
00014 *   created by: Markus W. Scherer
00015 */
00016 
00083 #ifndef __UTF_H__
00084 #define __UTF_H__
00085 
00086 /*
00087  * ANSI C headers:
00088  * stddef.h defines wchar_t
00089  */
00090 #include <stddef.h>
00091 #include "unicode/umachine.h"
00092 /* include the utfXX.h after the following definitions */
00093 
00094 /* If there is no compiler option for the preferred UTF size, then default to UTF-16. */
00095 #ifndef UTF_SIZE
00096 
00097 #   define UTF_SIZE 16
00098 #endif
00099 
00101 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00102 
00107 #ifndef U_HAVE_WCHAR_H
00108 #   define U_HAVE_WCHAR_H 1
00109 #endif
00110 
00111 /* U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it) */
00112 #if U_SIZEOF_WCHAR_T==0
00113 #   undef U_SIZEOF_WCHAR_T
00114 
00115 #   define U_SIZEOF_WCHAR_T 4
00116 #endif
00117 
00123 #if U_SIZEOF_WCHAR_T==4
00124     typedef wchar_t UChar32;
00125 #else
00126     typedef uint32_t UChar32;
00127 #endif
00128 
00133 typedef int32_t UTextOffset;
00134 
00135 /* Specify which macro versions are the default ones - safe or fast. */
00136 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00137 
00141 #   define UTF_SAFE
00142 #endif
00143 
00144 /* internal definitions ----------------------------------------------------- */
00145 
00158 #define UTF8_ERROR_VALUE_1 0x15
00159 #define UTF8_ERROR_VALUE_2 0x9f
00160 
00165 #define UTF_ERROR_VALUE 0xffff
00166 
00167 /* single-code point definitions -------------------------------------------- */
00168 
00170 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00171 
00176 #define UTF_IS_UNICODE_CHAR(c) \
00177     ((uint32_t)(c)<=0x10ffff && \
00178      !UTF_IS_SURROGATE(c) && ((c)&0xfffe)!=0xfffe)
00179 
00184 #define UTF_IS_ERROR(c) \
00185     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00186 
00188 #define UTF_IS_VALID(c) \
00189     ((uint32_t)(c)<=0x10ffff && \
00190      !UTF_IS_SURROGATE(c) && \
00191      ((c)&0xfffe)!=0xfffe && \
00192      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00193 
00194 /* include the utfXX.h ------------------------------------------------------ */
00195 
00196 #include "unicode/utf8.h"
00197 #include "unicode/utf16.h"
00198 #include "unicode/utf32.h"
00199 
00200 /* Define types and macros according to the selected UTF size. -------------- */
00201 
00208 #if UTF_SIZE==8
00209 
00210 #   error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00211 
00212 /*
00213  * ANSI C header:
00214  * limits.h defines CHAR_MAX
00215  */
00216 #   include <limits.h>
00217 
00218     /* Define UChar to be compatible with char if possible. */
00219 #   if CHAR_MAX>=255
00220         typedef char UChar;
00221 #   else
00222         typedef uint8_t UChar;
00223 #   endif
00224 
00225 #elif UTF_SIZE==16
00226 
00227     /* Define UChar to be compatible with wchar_t if possible. */
00228 #   if U_SIZEOF_WCHAR_T==2
00229         typedef wchar_t UChar;
00230 #   else
00231         typedef uint16_t UChar;
00232 #   endif
00233 
00235 #   define UTF_IS_SINGLE(uchar)                         UTF16_IS_SINGLE(uchar)
00236 
00237 #   define UTF_IS_LEAD(uchar)                           UTF16_IS_LEAD(uchar)
00238 
00239 #   define UTF_IS_TRAIL(uchar)                          UTF16_IS_TRAIL(uchar)
00240 
00242 #   define UTF_NEED_MULTIPLE_UCHAR(c)                   UTF16_NEED_MULTIPLE_UCHAR(c)
00243 
00244 #   define UTF_CHAR_LENGTH(c)                           UTF16_CHAR_LENGTH(c)
00245 
00246 #   define UTF_MAX_CHAR_LENGTH                          UTF16_MAX_CHAR_LENGTH
00247 
00248 #   define UTF_ARRAY_SIZE(size)                         UTF16_ARRAY_SIZE(size)
00249 
00251 #   define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00252 
00253 #   define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00254 
00256 #   define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00257 
00258 #   define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00259 
00261 #   define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00262 
00263 #   define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00264 
00266 #   define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00267 
00268 #   define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00269 
00271 #   define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00272 
00273 #   define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00274 
00276 #   define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00277 
00278 #   define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00279 
00281 #   define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00282 
00283 #   define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00284 
00286 #   define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00287 
00288 #   define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00289 
00291 #   define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00292 
00293 #   define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00294 
00296 #   define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00297 
00298 #   define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00299 
00300 #elif UTF_SIZE==32
00301 
00302 #   error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00303 
00304     typedef UChar32 UChar;
00305 
00306 #else
00307 #   error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00308 #endif
00309 
00310 /* Define the default macros for handling UTF characters. ------------------- */
00311 
00419 #ifdef UTF_SAFE
00420 
00421 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00422 
00423 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00424 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
00425 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
00426 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
00427 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
00428 
00429 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00430 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
00431 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
00432 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00433 
00434 #elif defined(UTF_STRICT)
00435 
00436 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00437 
00438 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00439 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
00440 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
00441 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
00442 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
00443 
00444 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00445 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
00446 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
00447 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00448 
00449 #else /* UTF_UNSAFE */
00450 
00451 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00452 
00453 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_UNSAFE(s, i, c)
00454 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_UNSAFE(s, i, c)
00455 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_UNSAFE(s, i)
00456 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_UNSAFE(s, i, n)
00457 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_UNSAFE(s, i)
00458 
00459 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_UNSAFE(s, i, c)
00460 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_UNSAFE(s, i)
00461 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_UNSAFE(s, i, n)
00462 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00463 
00464 #endif
00465 
00466 #endif

Generated at Tue Jun 12 14:04:03 2001 for ICU 1.8.1 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000