00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 1999-2000, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: utf32.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 1999sep20 00014 * created by: Markus W. Scherer 00015 * 00016 * This file defines macros to deal with UTF-32 code units and code points. 00017 * Signatures and semantics are the same as for the similarly named macros 00018 * in utf16.h. 00019 * utf32.h is included by utf.h after unicode/umachine.h 00020 * and some common definitions. 00021 */ 00022 00023 #ifndef __UTF32_H__ 00024 #define __UTF32_H__ 00025 00026 /* internal definitions ----------------------------------------------------- */ 00027 00028 #define UTF32_IS_SAFE(c, strict) \ 00029 ((uint32_t)(c)<=0x10ffff && \ 00030 (!(strict) || !UTF_IS_SURROGATE(c) && ((c)&0xfffe)!=0xfffe)) 00031 00032 /* 00033 * For the semantics of all of these macros, see utf16.h. 00034 * The UTF-32 versions are trivial because any code point is 00035 * encoded using exactly one code unit. 00036 */ 00037 00038 /* single-code point definitions -------------------------------------------- */ 00039 00040 /* classes of code unit values */ 00041 #define UTF32_IS_SINGLE(uchar) 1 00042 #define UTF32_IS_LEAD(uchar) 0 00043 #define UTF32_IS_TRAIL(uchar) 0 00044 00045 /* number of code units per code point */ 00046 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 00047 #define UTF32_CHAR_LENGTH(c) 1 00048 #define UTF32_MAX_CHAR_LENGTH 1 00049 00050 /* average number of code units compared to UTF-16 */ 00051 #define UTF32_ARRAY_SIZE(size) (size) 00052 00053 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 00054 (c)=(s)[i]; \ 00055 } 00056 00057 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00058 (c)=(s)[i]; \ 00059 if(!UTF32_IS_SAFE(c, strict)) { \ 00060 (c)=UTF_ERROR_VALUE; \ 00061 } \ 00062 } 00063 00064 /* definitions with forward iteration --------------------------------------- */ 00065 00066 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 00067 (c)=(s)[(i)++]; \ 00068 } 00069 00070 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 00071 (s)[(i)++]=(c); \ 00072 } 00073 00074 #define UTF32_FWD_1_UNSAFE(s, i) { \ 00075 ++(i); \ 00076 } 00077 00078 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 00079 (i)+=(n); \ 00080 } 00081 00082 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 00083 } 00084 00085 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00086 (c)=(s)[(i)++]; \ 00087 if(!UTF32_IS_SAFE(c, strict)) { \ 00088 (c)=UTF_ERROR_VALUE; \ 00089 } \ 00090 } 00091 00092 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 00093 if((uint32_t)(c)<=0x10ffff) { \ 00094 (s)[(i)++]=(c); \ 00095 } else /* c>0x10ffff, write 0xfffd */ { \ 00096 (s)[(i)++]=0xfffd; \ 00097 } \ 00098 } 00099 00100 #define UTF32_FWD_1_SAFE(s, i, length) { \ 00101 ++(i); \ 00102 } 00103 00104 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 00105 if(((i)+=(n))>(length)) { \ 00106 (i)=(length); \ 00107 } \ 00108 } 00109 00110 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 00111 } 00112 00113 /* definitions with backward iteration -------------------------------------- */ 00114 00115 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 00116 (c)=(s)[--(i)]; \ 00117 } 00118 00119 #define UTF32_BACK_1_UNSAFE(s, i) { \ 00120 --(i); \ 00121 } 00122 00123 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 00124 (i)-=(n); \ 00125 } 00126 00127 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00128 } 00129 00130 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00131 (c)=(s)[--(i)]; \ 00132 if(!UTF32_IS_SAFE(c, strict)) { \ 00133 (c)=UTF_ERROR_VALUE; \ 00134 } \ 00135 } 00136 00137 #define UTF32_BACK_1_SAFE(s, start, i) { \ 00138 --(i); \ 00139 } 00140 00141 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 00142 (i)-=(n); \ 00143 if((i)<(start)) { \ 00144 (i)=(start); \ 00145 } \ 00146 } 00147 00148 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 00149 } 00150 00151 #endif