Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

utf.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999-2001, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 1999sep09
00014 *   created by: Markus W. Scherer
00015 */
00016 
00087 #ifndef __UTF_H__
00088 #define __UTF_H__
00089 
00090 /*
00091  * ANSI C headers:
00092  * stddef.h defines wchar_t
00093  */
00094 #include <stddef.h>
00095 #include "unicode/umachine.h"
00096 /* include the utfXX.h after the following definitions */
00097 
00098 /* If there is no compiler option for the preferred UTF size, then default to UTF-16. */
00099 #ifndef UTF_SIZE
00100 
00101 #   define UTF_SIZE 16
00102 #endif
00103 
00105 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)
00106 
00111 #ifndef U_HAVE_WCHAR_H
00112 #   define U_HAVE_WCHAR_H 1
00113 #endif
00114 
00115 /* U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it) */
00116 #if U_SIZEOF_WCHAR_T==0
00117 #   undef U_SIZEOF_WCHAR_T
00118 
00119 #   define U_SIZEOF_WCHAR_T 4
00120 #endif
00121 
00127 #if U_SIZEOF_WCHAR_T==4
00128     typedef wchar_t UChar32;
00129 #else
00130     typedef uint32_t UChar32;
00131 #endif
00132 
00137 typedef int32_t UTextOffset;
00138 
00139 /* Specify which macro versions are the default ones - safe or fast. */
00140 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
00141 
00145 #   define UTF_SAFE
00146 #endif
00147 
00148 /* internal definitions ----------------------------------------------------- */
00149 
00162 #define UTF8_ERROR_VALUE_1 0x15
00163 #define UTF8_ERROR_VALUE_2 0x9f
00164 
00169 #define UTF_ERROR_VALUE 0xffff
00170 
00171 /* single-code point definitions -------------------------------------------- */
00172 
00174 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00175 
00189 #define UTF_IS_UNICODE_CHAR(c) \
00190     ((uint32_t)(c)<0xd800 || \
00191         ((uint32_t)(c)>0xdfff && \
00192          (uint32_t)(c)<=0x10ffff && \
00193          ((c)&0xfffe)!=0xfffe && \
00194          !(0xfdd0<=(uint32_t)(c) && (uint32_t)(c)<=0xfdef)))
00195 
00200 #define UTF_IS_ERROR(c) \
00201     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00202 
00204 #define UTF_IS_VALID(c) \
00205     (UTF_IS_UNICODE_CHAR(c) && \
00206      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00207 
00208 /* include the utfXX.h ------------------------------------------------------ */
00209 
00210 #include "unicode/utf8.h"
00211 #include "unicode/utf16.h"
00212 #include "unicode/utf32.h"
00213 
00214 /* Define types and macros according to the selected UTF size. -------------- */
00215 
00222 #if UTF_SIZE==8
00223 
00224 #   error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
00225 
00226 /*
00227  * ANSI C header:
00228  * limits.h defines CHAR_MAX
00229  */
00230 #   include <limits.h>
00231 
00232     /* Define UChar to be compatible with char if possible. */
00233 #   if CHAR_MAX>=255
00234         typedef char UChar;
00235 #   else
00236         typedef uint8_t UChar;
00237 #   endif
00238 
00239 #elif UTF_SIZE==16
00240 
00241     /* Define UChar to be compatible with wchar_t if possible. */
00242 #   if U_SIZEOF_WCHAR_T==2
00243         typedef wchar_t UChar;
00244 #   else
00245         typedef uint16_t UChar;
00246 #   endif
00247 
00249 #   define UTF_IS_SINGLE(uchar)                         UTF16_IS_SINGLE(uchar)
00250 
00251 #   define UTF_IS_LEAD(uchar)                           UTF16_IS_LEAD(uchar)
00252 
00253 #   define UTF_IS_TRAIL(uchar)                          UTF16_IS_TRAIL(uchar)
00254 
00256 #   define UTF_NEED_MULTIPLE_UCHAR(c)                   UTF16_NEED_MULTIPLE_UCHAR(c)
00257 
00258 #   define UTF_CHAR_LENGTH(c)                           UTF16_CHAR_LENGTH(c)
00259 
00260 #   define UTF_MAX_CHAR_LENGTH                          UTF16_MAX_CHAR_LENGTH
00261 
00262 #   define UTF_ARRAY_SIZE(size)                         UTF16_ARRAY_SIZE(size)
00263 
00265 #   define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00266 
00267 #   define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00268 
00270 #   define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00271 
00272 #   define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00273 
00275 #   define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00276 
00277 #   define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00278 
00280 #   define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00281 
00282 #   define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00283 
00285 #   define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00286 
00287 #   define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00288 
00290 #   define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00291 
00292 #   define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00293 
00295 #   define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00296 
00297 #   define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00298 
00300 #   define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00301 
00302 #   define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00303 
00305 #   define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00306 
00307 #   define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00308 
00310 #   define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00311 
00312 #   define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00313 
00314 #elif UTF_SIZE==32
00315 
00316 #   error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16
00317 
00318     typedef UChar32 UChar;
00319 
00320 #else
00321 #   error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
00322 #endif
00323 
00324 /* Define the default macros for handling UTF characters. ------------------- */
00325 
00433 #ifdef UTF_SAFE
00434 
00435 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)
00436 
00437 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
00438 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
00439 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
00440 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
00441 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
00442 
00443 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
00444 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
00445 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
00446 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00447 
00448 #elif defined(UTF_STRICT)
00449 
00450 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)
00451 
00452 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
00453 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
00454 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
00455 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
00456 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)
00457 
00458 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
00459 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
00460 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
00461 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00462 
00463 #else /* UTF_UNSAFE */
00464 
00465 #   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c)
00466 
00467 #   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_UNSAFE(s, i, c)
00468 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_UNSAFE(s, i, c)
00469 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_UNSAFE(s, i)
00470 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_UNSAFE(s, i, n)
00471 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_UNSAFE(s, i)
00472 
00473 #   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_UNSAFE(s, i, c)
00474 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_UNSAFE(s, i)
00475 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_UNSAFE(s, i, n)
00476 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i)
00477 
00478 #endif
00479 
00480 #endif

Generated on Mon Dec 3 19:00:28 2001 for ICU 2.0 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001