Go to the source code of this file.
Defines | |
#define | UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) |
#define | UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) |
#define | UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) |
#define | UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) |
Get the UTF-32 value directly from the surrogate pseudo-characters. More... | |
#define | UTF16_GET_PAIR_VALUE(first, second) (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) |
#define | UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) |
Takes a supplementary code point (0x10000..0x10ffff) and computes the first surrogate (0xd800..0xdbff) for UTF-16 encoding. More... | |
#define | UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) |
Takes a supplementary code point (0x10000..0x10ffff) and computes the second surrogate (0xdc00..0xdfff) for UTF-16 encoding. More... | |
#define | UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) |
alias for UTF_FIRST_SURROGATE. More... | |
#define | UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) |
alias for UTF_SECOND_SURROGATE. More... | |
#define | UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) |
#define | UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) |
#define | UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) |
#define | UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) |
#define | UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) |
#define | UTF16_MAX_CHAR_LENGTH 2 |
#define | UTF16_ARRAY_SIZE(size) (size) |
#define | UTF16_GET_CHAR_UNSAFE(s, i, c) |
Get a single code point from an offset that points to any of the code units that belong to that code point. More... | |
#define | UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) |
#define | UTF16_NEXT_CHAR_UNSAFE(s, i, c) |
Get a single code point from an offset that points to the first of the code units that belong to that code point. More... | |
#define | UTF16_APPEND_CHAR_UNSAFE(s, i, c) |
#define | UTF16_FWD_1_UNSAFE(s, i) |
#define | UTF16_FWD_N_UNSAFE(s, i, n) |
#define | UTF16_SET_CHAR_START_UNSAFE(s, i) |
Set a random-access offset and adjust it so that it points to the beginning of a Unicode character. More... | |
#define | UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) |
#define | UTF16_APPEND_CHAR_SAFE(s, i, length, c) |
#define | UTF16_FWD_1_SAFE(s, i, length) |
#define | UTF16_FWD_N_SAFE(s, i, length, n) |
#define | UTF16_SET_CHAR_START_SAFE(s, start, i) |
#define | UTF16_PREV_CHAR_UNSAFE(s, i, c) |
Get a single code point from an offset that points behind the last of the code units that belong to that code point. More... | |
#define | UTF16_BACK_1_UNSAFE(s, i) |
#define | UTF16_BACK_N_UNSAFE(s, i, n) |
#define | UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) |
Set a random-access offset and adjust it so that it points after the end of a Unicode character. More... | |
#define | UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) |
#define | UTF16_BACK_1_SAFE(s, start, i) |
#define | UTF16_BACK_N_SAFE(s, start, i, n) |
#define | UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) |
This file defines macros to deal with UTF-16 code units and code points. "Safe" macros check for length overruns and illegal sequences, and also for irregular sequences when the strict option is set. "Unsafe" macros are designed for maximum speed. utf16.h is included by utf.h after unicode/umachine.h and some common definitions.
Usage: ICU coding guidelines for if() statements should be followed when using these macros. Compound statements (curly braces {}) must be used for if-else-while... bodies and all macro statements should be terminated with semicolon.
Definition in file utf16.h.
|
Value: { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else if((uint32_t)(c)<=0x10ffff) { \ if((i)+1<(length)) { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } else { \ (s)[(i)++]=UTF_ERROR_VALUE; \ } \ } else { \ (s)[(i)++]=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } \ } |
|
|
|
Value: { \ if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ --(i); \ } \ } |
|
Value: { \ if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ --(i); \ } \ } |
|
Value: { \ int32_t __N=(n); \ while(__N>0 && (i)>(start)) { \ UTF16_BACK_1_SAFE(s, start, i); \ --__N; \ } \ } |
|
Value: { \ int32_t __N=(n); \ while(__N>0) { \ UTF16_BACK_1_UNSAFE(s, i); \ --__N; \ } \ } |
|
|
|
Value: { \ if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \ ++(i); \ } \ } |
|
Value: { \ if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ ++(i); \ } \ } |
|
Value: { \ int32_t __N=(n); \ while(__N>0 && (i)<(length)) { \ UTF16_FWD_1_SAFE(s, i, length); \ --__N; \ } \ } |
|
Value: { \ int32_t __N=(n); \ while(__N>0) { \ UTF16_FWD_1_UNSAFE(s, i); \ --__N; \ } \ } |
|
Value: { \ (c)=(s)[i]; \ if(UTF_IS_SURROGATE(c)) { \ uint16_t __c2; \ if(UTF_IS_SURROGATE_FIRST(c)) { \ if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ \ } else if(strict) {\ \ (c)=UTF_ERROR_VALUE; \ } \ } else { \ if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ \ } else if(strict) {\ \ (c)=UTF_ERROR_VALUE; \ } \ } \ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ (c)=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \ (c)=(s)[i]; \ if(UTF_IS_SURROGATE(c)) { \ if(UTF_IS_SURROGATE_FIRST(c)) { \ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ } else { \ (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ } \ } \ } Assume 0<=i<length. This could be used for iteration together with UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), but the use of UTF16_NEXT_CHAR_[UN]SAFE() and UTF16_PREV_CHAR_[UN]SAFE() is more efficient for that. |
|
|
|
|
|
|
|
|
|
alias for UTF_FIRST_SURROGATE.
|
|
|
|
|
|
Value: { \ (c)=(s)[(i)++]; \ if(UTF_IS_FIRST_SURROGATE(c)) { \ uint16_t __c2; \ if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ ++(i); \ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ \ } else if(strict) {\ \ (c)=UTF_ERROR_VALUE; \ } \ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ \ (c)=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \ (c)=(s)[(i)++]; \ if(UTF_IS_FIRST_SURROGATE(c)) { \ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ } \ } Assume 0<=i<length. |
|
Value: { \ (c)=(s)[--(i)]; \ if(UTF_IS_SECOND_SURROGATE(c)) { \ uint16_t __c2; \ if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ --(i); \ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ \ } else if(strict) {\ \ (c)=UTF_ERROR_VALUE; \ } \ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ \ (c)=UTF_ERROR_VALUE; \ } \ } |
|
Value: { \ (c)=(s)[--(i)]; \ if(UTF_IS_SECOND_SURROGATE(c)) { \ (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ } \ } Assume 0<=i<length. |
|
Value: { \ if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \ ++(i); \ } \ } |
|
Value: { \ if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ ++(i); \ } \ } The offset that is passed in points behind any code unit of a code point and will point behind the last code unit after the macro invocation. Never decrements the offset. |
|
Value: { \ if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ --(i); \ } \ } |
|
Value: { \ if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ --(i); \ } \ } The offset that is passed in points to any code unit of a code point and will point to the first code unit after the macro invocation. Never increments the offset. |
|
alias for UTF_SECOND_SURROGATE.
|
|
Takes a supplementary code point (0x10000..0x10ffff) and computes the first surrogate (0xd800..0xdbff) for UTF-16 encoding.
|
|
|
|
|
|
|
|
Takes a supplementary code point (0x10000..0x10ffff) and computes the second surrogate (0xdc00..0xdfff) for UTF-16 encoding.
|
|
Get the UTF-32 value directly from the surrogate pseudo-characters.
|