00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00029 #ifndef __UTF16_H__
00030 #define __UTF16_H__
00031
00032
00033
00034
00035 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00036 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00037
00038 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00039
00040
00041 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00042
00043 #define UTF16_GET_PAIR_VALUE(first, second) \
00044 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00045
00046
00047 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00048 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00049 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00050
00051
00052 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00053 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00054 #define UTF16_MAX_CHAR_LENGTH 2
00055
00056
00057 #define UTF16_ARRAY_SIZE(size) (size)
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00070 (c)=(s)[i]; \
00071 if(UTF_IS_SURROGATE(c)) { \
00072 if(UTF_IS_SURROGATE_FIRST(c)) { \
00073 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00074 } else { \
00075 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00076 } \
00077 } \
00078 }
00079
00080 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00081 (c)=(s)[i]; \
00082 if(UTF_IS_SURROGATE(c)) { \
00083 uint16_t __c2; \
00084 if(UTF_IS_SURROGATE_FIRST(c)) { \
00085 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00086 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00087 \
00088 } else if(strict) {\
00089 \
00090 (c)=UTF_ERROR_VALUE; \
00091 } \
00092 } else { \
00093 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00094 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00095 \
00096 } else if(strict) {\
00097 \
00098 (c)=UTF_ERROR_VALUE; \
00099 } \
00100 } \
00101 \
00102 } \
00103 }
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00121 (c)=(s)[(i)++]; \
00122 if(UTF_IS_FIRST_SURROGATE(c)) { \
00123 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00124 } \
00125 }
00126
00127 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00128 if((uint32_t)(c)<=0xffff) { \
00129 (s)[(i)++]=(uint16_t)(c); \
00130 } else { \
00131 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00132 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00133 } \
00134 }
00135
00136 #define UTF16_FWD_1_UNSAFE(s, i) { \
00137 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00138 ++(i); \
00139 } \
00140 }
00141
00142 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00143 UTextOffset __N=(n); \
00144 while(__N>0) { \
00145 UTF16_FWD_1_UNSAFE(s, i); \
00146 --__N; \
00147 } \
00148 }
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00160 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00161 --(i); \
00162 } \
00163 }
00164
00165
00166
00167 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00168 (c)=(s)[(i)++]; \
00169 if(UTF_IS_FIRST_SURROGATE(c)) { \
00170 uint16_t __c2; \
00171 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00172 ++(i); \
00173 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00174 \
00175 } else if(strict) {\
00176 \
00177 (c)=UTF_ERROR_VALUE; \
00178 } \
00179 } else if(strict && UTF_IS_SECOND_SURROGATE(c)) { \
00180 \
00181 (c)=UTF_ERROR_VALUE; \
00182 \
00183 } \
00184 }
00185
00186 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00187 if((uint32_t)(c)<=0xffff) { \
00188 (s)[(i)++]=(uint16_t)(c); \
00189 } else if((uint32_t)(c)<=0x10ffff) { \
00190 if((i)+1<(length)) { \
00191 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00192 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00193 } else { \
00194 (s)[(i)++]=UTF_ERROR_VALUE; \
00195 } \
00196 } else { \
00197 (s)[(i)++]=UTF_ERROR_VALUE; \
00198 } \
00199 }
00200
00201 #define UTF16_FWD_1_SAFE(s, i, length) { \
00202 if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
00203 ++(i); \
00204 } \
00205 }
00206
00207 #define UTF16_FWD_N_SAFE(s, i, length, n) { \
00208 UTextOffset __N=(n); \
00209 while(__N>0 && (i)<(length)) { \
00210 UTF16_FWD_1_SAFE(s, i, length); \
00211 --__N; \
00212 } \
00213 }
00214
00215 #define UTF16_SET_CHAR_START_SAFE(s, start, i) { \
00216 if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00217 --(i); \
00218 } \
00219 }
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00238 (c)=(s)[--(i)]; \
00239 if(UTF_IS_SECOND_SURROGATE(c)) { \
00240 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00241 } \
00242 }
00243
00244 #define UTF16_BACK_1_UNSAFE(s, i) { \
00245 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00246 --(i); \
00247 } \
00248 }
00249
00250 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00251 UTextOffset __N=(n); \
00252 while(__N>0) { \
00253 UTF16_BACK_1_UNSAFE(s, i); \
00254 --__N; \
00255 } \
00256 }
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00268 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00269 ++(i); \
00270 } \
00271 }
00272
00273
00274
00275 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00276 (c)=(s)[--(i)]; \
00277 if(UTF_IS_SECOND_SURROGATE(c)) { \
00278 uint16_t __c2; \
00279 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00280 --(i); \
00281 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00282 \
00283 } else if(strict) {\
00284 \
00285 (c)=UTF_ERROR_VALUE; \
00286 } \
00287 } else if(strict && UTF_IS_FIRST_SURROGATE(c)) { \
00288 \
00289 (c)=UTF_ERROR_VALUE; \
00290 \
00291 } \
00292 }
00293
00294 #define UTF16_BACK_1_SAFE(s, start, i) { \
00295 if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00296 --(i); \
00297 } \
00298 }
00299
00300 #define UTF16_BACK_N_SAFE(s, start, i, n) { \
00301 UTextOffset __N=(n); \
00302 while(__N>0 && (i)>(start)) { \
00303 UTF16_BACK_1_SAFE(s, start, i); \
00304 --__N; \
00305 } \
00306 }
00307
00308 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
00309 if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
00310 ++(i); \
00311 } \
00312 }
00313
00314 #endif