Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

utf_old.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2002, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2002sep21
00014 *   created by: Markus W. Scherer
00015 */
00016 
00140 /* utf.h must be included first. */
00141 #ifndef __UTF_H__
00142 #   include "unicode/utf.h"
00143 #endif
00144 
00145 #ifndef __UTF_OLD_H__
00146 #define __UTF_OLD_H__
00147 
00148 /* Formerly utf.h, part 1 --------------------------------------------------- */
00149 
00157 typedef int32_t UTextOffset;
00158 
00160 #define UTF_SIZE 16
00161 
00168 #define UTF_SAFE
00169 
00170 #undef UTF_UNSAFE
00171 
00172 #undef UTF_STRICT
00173 
00186 #define UTF8_ERROR_VALUE_1 0x15
00187 
00193 #define UTF8_ERROR_VALUE_2 0x9f
00194 
00201 #define UTF_ERROR_VALUE 0xffff
00202 
00209 #define UTF_IS_ERROR(c) \
00210     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00211 
00217 #define UTF_IS_VALID(c) \
00218     (UTF_IS_UNICODE_CHAR(c) && \
00219      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00220 
00225 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00226 
00232 #define UTF_IS_UNICODE_NONCHAR(c) \
00233     ((c)>=0xfdd0 && \
00234      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00235      (uint32_t)(c)<=0x10ffff)
00236 
00252 #define UTF_IS_UNICODE_CHAR(c) \
00253     ((uint32_t)(c)<0xd800 || \
00254         ((uint32_t)(c)>0xdfff && \
00255          (uint32_t)(c)<=0x10ffff && \
00256          !UTF_IS_UNICODE_NONCHAR(c)))
00257 
00258 /* Formerly utf8.h ---------------------------------------------------------- */
00259 
00264 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00265 
00270 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00271 
00273 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
00274 
00275 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
00276 
00277 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
00278 
00280 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
00281 
00295 #if 1
00296 #   define UTF8_CHAR_LENGTH(c) \
00297         ((uint32_t)(c)<=0x7f ? 1 : \
00298             ((uint32_t)(c)<=0x7ff ? 2 : \
00299                 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
00300             ) \
00301         )
00302 #else
00303 #   define UTF8_CHAR_LENGTH(c) \
00304         ((uint32_t)(c)<=0x7f ? 1 : \
00305             ((uint32_t)(c)<=0x7ff ? 2 : \
00306                 ((uint32_t)(c)<=0xffff ? 3 : \
00307                     ((uint32_t)(c)<=0x10ffff ? 4 : \
00308                         ((uint32_t)(c)<=0x3ffffff ? 5 : \
00309                             ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
00310                         ) \
00311                     ) \
00312                 ) \
00313             ) \
00314         )
00315 #endif
00316 
00318 #define UTF8_MAX_CHAR_LENGTH 4
00319 
00321 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
00322 
00324 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
00325     int32_t __I=(int32_t)(i); \
00326     UTF8_SET_CHAR_START_UNSAFE(s, __I); \
00327     UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \
00328 }
00329 
00331 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00332     int32_t __I=(int32_t)(i); \
00333     UTF8_SET_CHAR_START_SAFE(s, start, __I); \
00334     UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \
00335 }
00336 
00338 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
00339     (c)=(s)[(i)++]; \
00340     if((uint8_t)((c)-0xc0)<0x35) { \
00341         uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
00342         UTF8_MASK_LEAD_BYTE(c, __count); \
00343         switch(__count) { \
00344         /* each following branch falls through to the next one */ \
00345         case 3: \
00346             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00347         case 2: \
00348             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00349         case 1: \
00350             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00351         /* no other branches to optimize switch() */ \
00352             break; \
00353         } \
00354     } \
00355 }
00356 
00358 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
00359     if((uint32_t)(c)<=0x7f) { \
00360         (s)[(i)++]=(uint8_t)(c); \
00361     } else { \
00362         if((uint32_t)(c)<=0x7ff) { \
00363             (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00364         } else { \
00365             if((uint32_t)(c)<=0xffff) { \
00366                 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00367             } else { \
00368                 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00369                 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00370             } \
00371             (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00372         } \
00373         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00374     } \
00375 }
00376 
00378 #define UTF8_FWD_1_UNSAFE(s, i) { \
00379     (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
00380 }
00381 
00383 #define UTF8_FWD_N_UNSAFE(s, i, n) { \
00384     int32_t __N=(n); \
00385     while(__N>0) { \
00386         UTF8_FWD_1_UNSAFE(s, i); \
00387         --__N; \
00388     } \
00389 }
00390 
00392 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
00393     while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
00394 }
00395 
00397 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00398     (c)=(s)[(i)++]; \
00399     if((c)>=0x80) { \
00400         if(UTF8_IS_LEAD(c)) { \
00401             (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
00402         } else { \
00403             (c)=UTF8_ERROR_VALUE_1; \
00404         } \
00405     } \
00406 }
00407 
00409 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
00410     if((uint32_t)(c)<=0x7f) { \
00411         (s)[(i)++]=(uint8_t)(c); \
00412     } else { \
00413         (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
00414     } \
00415 }
00416 
00418 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
00419 
00421 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
00422 
00424 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
00425 
00427 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
00428     (c)=(s)[--(i)]; \
00429     if(UTF8_IS_TRAIL(c)) { \
00430         uint8_t __b, __count=1, __shift=6; \
00431 \
00432         /* c is a trail byte */ \
00433         (c)&=0x3f; \
00434         for(;;) { \
00435             __b=(s)[--(i)]; \
00436             if(__b>=0xc0) { \
00437                 UTF8_MASK_LEAD_BYTE(__b, __count); \
00438                 (c)|=(UChar32)__b<<__shift; \
00439                 break; \
00440             } else { \
00441                 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00442                 ++__count; \
00443                 __shift+=6; \
00444             } \
00445         } \
00446     } \
00447 }
00448 
00450 #define UTF8_BACK_1_UNSAFE(s, i) { \
00451     while(UTF8_IS_TRAIL((s)[--(i)])) {} \
00452 }
00453 
00455 #define UTF8_BACK_N_UNSAFE(s, i, n) { \
00456     int32_t __N=(n); \
00457     while(__N>0) { \
00458         UTF8_BACK_1_UNSAFE(s, i); \
00459         --__N; \
00460     } \
00461 }
00462 
00464 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00465     UTF8_BACK_1_UNSAFE(s, i); \
00466     UTF8_FWD_1_UNSAFE(s, i); \
00467 }
00468 
00470 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00471     (c)=(s)[--(i)]; \
00472     if((c)>=0x80) { \
00473         if((c)<=0xbf) { \
00474             (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
00475         } else { \
00476             (c)=UTF8_ERROR_VALUE_1; \
00477         } \
00478     } \
00479 }
00480 
00482 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
00483 
00485 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
00486 
00488 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
00489 
00490 /* Formerly utf16.h --------------------------------------------------------- */
00491 
00493 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00494 
00496 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00497 
00499 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00500 
00502 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00503 
00505 #define UTF16_GET_PAIR_VALUE(first, second) \
00506     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00507 
00509 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00510 
00512 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00513 
00515 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00516 
00518 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00519 
00521 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00522 
00524 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00525 
00527 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00528 
00530 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00531 
00533 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00534 
00536 #define UTF16_MAX_CHAR_LENGTH 2
00537 
00539 #define UTF16_ARRAY_SIZE(size) (size)
00540 
00552 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00553     (c)=(s)[i]; \
00554     if(UTF_IS_SURROGATE(c)) { \
00555         if(UTF_IS_SURROGATE_FIRST(c)) { \
00556             (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00557         } else { \
00558             (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00559         } \
00560     } \
00561 }
00562 
00564 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00565     (c)=(s)[i]; \
00566     if(UTF_IS_SURROGATE(c)) { \
00567         uint16_t __c2; \
00568         if(UTF_IS_SURROGATE_FIRST(c)) { \
00569             if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00570                 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00571                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00572             } else if(strict) {\
00573                 /* unmatched first surrogate */ \
00574                 (c)=UTF_ERROR_VALUE; \
00575             } \
00576         } else { \
00577             if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00578                 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00579                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00580             } else if(strict) {\
00581                 /* unmatched second surrogate */ \
00582                 (c)=UTF_ERROR_VALUE; \
00583             } \
00584         } \
00585     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00586         (c)=UTF_ERROR_VALUE; \
00587     } \
00588 }
00589 
00591 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00592     (c)=(s)[(i)++]; \
00593     if(UTF_IS_FIRST_SURROGATE(c)) { \
00594         (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00595     } \
00596 }
00597 
00599 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00600     if((uint32_t)(c)<=0xffff) { \
00601         (s)[(i)++]=(uint16_t)(c); \
00602     } else { \
00603         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00604         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00605     } \
00606 }
00607 
00609 #define UTF16_FWD_1_UNSAFE(s, i) { \
00610     if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00611         ++(i); \
00612     } \
00613 }
00614 
00616 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00617     int32_t __N=(n); \
00618     while(__N>0) { \
00619         UTF16_FWD_1_UNSAFE(s, i); \
00620         --__N; \
00621     } \
00622 }
00623 
00625 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00626     if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00627         --(i); \
00628     } \
00629 }
00630 
00632 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00633     (c)=(s)[(i)++]; \
00634     if(UTF_IS_FIRST_SURROGATE(c)) { \
00635         uint16_t __c2; \
00636         if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00637             ++(i); \
00638             (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00639             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00640         } else if(strict) {\
00641             /* unmatched first surrogate */ \
00642             (c)=UTF_ERROR_VALUE; \
00643         } \
00644     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00645         /* unmatched second surrogate or other non-character */ \
00646         (c)=UTF_ERROR_VALUE; \
00647     } \
00648 }
00649 
00651 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00652     if((uint32_t)(c)<=0xffff) { \
00653         (s)[(i)++]=(uint16_t)(c); \
00654     } else if((uint32_t)(c)<=0x10ffff) { \
00655         if((i)+1<(length)) { \
00656             (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00657             (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00658         } else /* not enough space */ { \
00659             (s)[(i)++]=UTF_ERROR_VALUE; \
00660         } \
00661     } else /* c>0x10ffff, write error value */ { \
00662         (s)[(i)++]=UTF_ERROR_VALUE; \
00663     } \
00664 }
00665 
00667 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
00668 
00670 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
00671 
00673 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
00674 
00676 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00677     (c)=(s)[--(i)]; \
00678     if(UTF_IS_SECOND_SURROGATE(c)) { \
00679         (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00680     } \
00681 }
00682 
00684 #define UTF16_BACK_1_UNSAFE(s, i) { \
00685     if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00686         --(i); \
00687     } \
00688 }
00689 
00691 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00692     int32_t __N=(n); \
00693     while(__N>0) { \
00694         UTF16_BACK_1_UNSAFE(s, i); \
00695         --__N; \
00696     } \
00697 }
00698 
00700 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00701     if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00702         ++(i); \
00703     } \
00704 }
00705 
00707 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00708     (c)=(s)[--(i)]; \
00709     if(UTF_IS_SECOND_SURROGATE(c)) { \
00710         uint16_t __c2; \
00711         if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00712             --(i); \
00713             (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00714             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00715         } else if(strict) {\
00716             /* unmatched second surrogate */ \
00717             (c)=UTF_ERROR_VALUE; \
00718         } \
00719     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00720         /* unmatched first surrogate or other non-character */ \
00721         (c)=UTF_ERROR_VALUE; \
00722     } \
00723 }
00724 
00726 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
00727 
00729 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
00730 
00732 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
00733 
00734 /* Formerly utf32.h --------------------------------------------------------- */
00735 
00736 /*
00737 * Old documentation:
00738 *
00739 *   This file defines macros to deal with UTF-32 code units and code points.
00740 *   Signatures and semantics are the same as for the similarly named macros
00741 *   in utf16.h.
00742 *   utf32.h is included by utf.h after unicode/umachine.h</p>
00743 *   and some common definitions.
00744 *   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
00745 *                  Compound statements (curly braces {}) must be used  for if-else-while...
00746 *                  bodies and all macro statements should be terminated with semicolon.</p>
00747 */
00748 
00749 /* internal definitions ----------------------------------------------------- */
00750 
00752 #define UTF32_IS_SAFE(c, strict) \
00753     (!(strict) ? \
00754         (uint32_t)(c)<=0x10ffff : \
00755         UTF_IS_UNICODE_CHAR(c))
00756 
00757 /*
00758  * For the semantics of all of these macros, see utf16.h.
00759  * The UTF-32 versions are trivial because any code point is
00760  * encoded using exactly one code unit.
00761  */
00762 
00763 /* single-code point definitions -------------------------------------------- */
00764 
00765 /* classes of code unit values */
00766 
00768 #define UTF32_IS_SINGLE(uchar) 1
00769 
00770 #define UTF32_IS_LEAD(uchar) 0
00771 
00772 #define UTF32_IS_TRAIL(uchar) 0
00773 
00774 /* number of code units per code point */
00775 
00777 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0
00778 
00779 #define UTF32_CHAR_LENGTH(c) 1
00780 
00781 #define UTF32_MAX_CHAR_LENGTH 1
00782 
00783 /* average number of code units compared to UTF-16 */
00784 
00786 #define UTF32_ARRAY_SIZE(size) (size)
00787 
00789 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
00790     (c)=(s)[i]; \
00791 }
00792 
00794 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00795     (c)=(s)[i]; \
00796     if(!UTF32_IS_SAFE(c, strict)) { \
00797         (c)=UTF_ERROR_VALUE; \
00798     } \
00799 }
00800 
00801 /* definitions with forward iteration --------------------------------------- */
00802 
00804 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
00805     (c)=(s)[(i)++]; \
00806 }
00807 
00809 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
00810     (s)[(i)++]=(c); \
00811 }
00812 
00814 #define UTF32_FWD_1_UNSAFE(s, i) { \
00815     ++(i); \
00816 }
00817 
00819 #define UTF32_FWD_N_UNSAFE(s, i, n) { \
00820     (i)+=(n); \
00821 }
00822 
00824 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
00825 }
00826 
00828 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00829     (c)=(s)[(i)++]; \
00830     if(!UTF32_IS_SAFE(c, strict)) { \
00831         (c)=UTF_ERROR_VALUE; \
00832     } \
00833 }
00834 
00836 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
00837     if((uint32_t)(c)<=0x10ffff) { \
00838         (s)[(i)++]=(c); \
00839     } else /* c>0x10ffff, write 0xfffd */ { \
00840         (s)[(i)++]=0xfffd; \
00841     } \
00842 }
00843 
00845 #define UTF32_FWD_1_SAFE(s, i, length) { \
00846     ++(i); \
00847 }
00848 
00850 #define UTF32_FWD_N_SAFE(s, i, length, n) { \
00851     if(((i)+=(n))>(length)) { \
00852         (i)=(length); \
00853     } \
00854 }
00855 
00857 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
00858 }
00859 
00860 /* definitions with backward iteration -------------------------------------- */
00861 
00863 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
00864     (c)=(s)[--(i)]; \
00865 }
00866 
00868 #define UTF32_BACK_1_UNSAFE(s, i) { \
00869     --(i); \
00870 }
00871 
00873 #define UTF32_BACK_N_UNSAFE(s, i, n) { \
00874     (i)-=(n); \
00875 }
00876 
00878 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00879 }
00880 
00882 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00883     (c)=(s)[--(i)]; \
00884     if(!UTF32_IS_SAFE(c, strict)) { \
00885         (c)=UTF_ERROR_VALUE; \
00886     } \
00887 }
00888 
00890 #define UTF32_BACK_1_SAFE(s, start, i) { \
00891     --(i); \
00892 }
00893 
00895 #define UTF32_BACK_N_SAFE(s, start, i, n) { \
00896     (i)-=(n); \
00897     if((i)<(start)) { \
00898         (i)=(start); \
00899     } \
00900 }
00901 
00903 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
00904 }
00905 
00906 /* Formerly utf.h, part 2 --------------------------------------------------- */
00907 
00913 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00914 
00916 #define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00917 
00919 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00920 
00921 
00923 #define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00924 
00926 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00927 
00928 
00930 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00931 
00933 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00934 
00935 
00937 #define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00938 
00940 #define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00941 
00942 
00944 #define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00945 
00947 #define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00948 
00949 
00951 #define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00952 
00954 #define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00955 
00956 
00958 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00959 
00961 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00962 
00963 
00965 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00966 
00968 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00969 
00970 
00972 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00973 
00975 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00976 
00977 
00979 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00980 
00982 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00983 
00984 /* Define default macros (UTF-16 "safe") ------------------------------------ */
00985 
00991 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
00992 
00998 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
00999 
01005 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
01006 
01012 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
01013 
01019 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
01020 
01026 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
01027 
01037 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
01038 
01050 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
01051 
01063 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
01064 
01074 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
01075 
01085 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
01086 
01101 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
01102 
01114 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
01115 
01127 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
01128 
01140 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
01141 
01156 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
01157 
01158 #endif

Generated on Wed Dec 18 16:50:07 2002 for ICU 2.4 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001