Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

utf16.h File Reference

C API: UTF-16 macros. More...

Go to the source code of this file.

Defines

#define UTF_IS_FIRST_SURROGATE(uchar)   (((uchar)&0xfffffc00)==0xd800)
#define UTF_IS_SECOND_SURROGATE(uchar)   (((uchar)&0xfffffc00)==0xdc00)
#define UTF_IS_SURROGATE_FIRST(c)   (((c)&0x400)==0)
#define UTF_SURROGATE_OFFSET   ((0xd800<<10UL)+0xdc00-0x10000)
 Get the UTF-32 value directly from the surrogate pseudo-characters. More...

#define UTF16_GET_PAIR_VALUE(first, second)   (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
#define UTF_FIRST_SURROGATE(supplementary)   (UChar)(((supplementary)>>10)+0xd7c0)
 Takes a supplementary code point (0x10000..0x10ffff) and computes the first surrogate (0xd800..0xdbff) for UTF-16 encoding. More...

#define UTF_SECOND_SURROGATE(supplementary)   (UChar)(((supplementary)&0x3ff)|0xdc00)
 Takes a supplementary code point (0x10000..0x10ffff) and computes the second surrogate (0xdc00..0xdfff) for UTF-16 encoding. More...

#define UTF16_LEAD(supplementary)   UTF_FIRST_SURROGATE(supplementary)
 alias for UTF_FIRST_SURROGATE. More...

#define UTF16_TRAIL(supplementary)   UTF_SECOND_SURROGATE(supplementary)
 alias for UTF_SECOND_SURROGATE. More...

#define UTF16_IS_SINGLE(uchar)   !UTF_IS_SURROGATE(uchar)
#define UTF16_IS_LEAD(uchar)   UTF_IS_FIRST_SURROGATE(uchar)
#define UTF16_IS_TRAIL(uchar)   UTF_IS_SECOND_SURROGATE(uchar)
#define UTF16_NEED_MULTIPLE_UCHAR(c)   ((uint32_t)(c)>0xffff)
#define UTF16_CHAR_LENGTH(c)   ((uint32_t)(c)<=0xffff ? 1 : 2)
#define UTF16_MAX_CHAR_LENGTH   2
#define UTF16_ARRAY_SIZE(size)   (size)
#define UTF16_GET_CHAR_UNSAFE(s, i, c)
 Get a single code point from an offset that points to any of the code units that belong to that code point. More...

#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c)
 Get a single code point from an offset that points to the first of the code units that belong to that code point. More...

#define UTF16_APPEND_CHAR_UNSAFE(s, i, c)
#define UTF16_FWD_1_UNSAFE(s, i)
#define UTF16_FWD_N_UNSAFE(s, i, n)
#define UTF16_SET_CHAR_START_UNSAFE(s, i)
 Set a random-access offset and adjust it so that it points to the beginning of a Unicode character. More...

#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c)
#define UTF16_FWD_1_SAFE(s, i, length)
#define UTF16_FWD_N_SAFE(s, i, length, n)
#define UTF16_SET_CHAR_START_SAFE(s, start, i)
#define UTF16_PREV_CHAR_UNSAFE(s, i, c)
 Get a single code point from an offset that points behind the last of the code units that belong to that code point. More...

#define UTF16_BACK_1_UNSAFE(s, i)
#define UTF16_BACK_N_UNSAFE(s, i, n)
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
 Set a random-access offset and adjust it so that it points after the end of a Unicode character. More...

#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
#define UTF16_BACK_1_SAFE(s, start, i)
#define UTF16_BACK_N_SAFE(s, start, i, n)
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)


Detailed Description

C API: UTF-16 macros.

This file defines macros to deal with UTF-16 code units and code points. "Safe" macros check for length overruns and illegal sequences, and also for irregular sequences when the strict option is set. "Unsafe" macros are designed for maximum speed. utf16.h is included by utf.h after unicode/umachine.h and some common definitions.

Usage: ICU coding guidelines for if() statements should be followed when using these macros. Compound statements (curly braces {}) must be used for if-else-while... bodies and all macro statements should be terminated with semicolon.

Definition in file utf16.h.


Define Documentation

#define UTF16_APPEND_CHAR_SAFE s,
i,
length,
 
 

Value:

{ \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else if((uint32_t)(c)<=0x10ffff) { \
        if((i)+1<(length)) { \
            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
        } else   { \
            (s)[(i)++]=UTF_ERROR_VALUE; \
        } \
    } else   { \
        (s)[(i)++]=UTF_ERROR_VALUE; \
    } \
}

Definition at line 210 of file utf16.h.

#define UTF16_APPEND_CHAR_UNSAFE s,
i,
 
 

Value:

{ \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else { \
        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    } \
}

Definition at line 152 of file utf16.h.

#define UTF16_ARRAY_SIZE size       (size)
 

Definition at line 81 of file utf16.h.

#define UTF16_BACK_1_SAFE s,
start,
 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        --(i); \
    } \
}

Definition at line 317 of file utf16.h.

#define UTF16_BACK_1_UNSAFE s,
 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
        --(i); \
    } \
}

Definition at line 268 of file utf16.h.

#define UTF16_BACK_N_SAFE s,
start,
i,
 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0 && (i)>(start)) { \
        UTF16_BACK_1_SAFE(s, start, i); \
        --__N; \
    } \
}

Definition at line 323 of file utf16.h.

#define UTF16_BACK_N_UNSAFE s,
i,
 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF16_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
}

Definition at line 274 of file utf16.h.

#define UTF16_CHAR_LENGTH      ((uint32_t)(c)<=0xffff ? 1 : 2)
 

Definition at line 77 of file utf16.h.

#define UTF16_FWD_1_SAFE s,
i,
length   
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
        ++(i); \
    } \
}

Definition at line 225 of file utf16.h.

#define UTF16_FWD_1_UNSAFE s,
 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
        ++(i); \
    } \
}

Definition at line 161 of file utf16.h.

#define UTF16_FWD_N_SAFE s,
i,
length,
 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0 && (i)<(length)) { \
        UTF16_FWD_1_SAFE(s, i, length); \
        --__N; \
    } \
}

Definition at line 231 of file utf16.h.

#define UTF16_FWD_N_UNSAFE s,
i,
 
 

Value:

{ \
    int32_t __N=(n); \
    while(__N>0) { \
        UTF16_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
}

Definition at line 167 of file utf16.h.

#define UTF16_GET_CHAR_SAFE s,
start,
i,
length,
c,
strict   
 

Value:

{ \
    (c)=(s)[i]; \
    if(UTF_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(UTF_IS_SURROGATE_FIRST(c)) { \
            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
                  \
            } else if(strict) {\
                  \
                (c)=UTF_ERROR_VALUE; \
            } \
        } else { \
            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
                  \
            } else if(strict) {\
                  \
                (c)=UTF_ERROR_VALUE; \
            } \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
        (c)=UTF_ERROR_VALUE; \
    } \
}

Definition at line 104 of file utf16.h.

#define UTF16_GET_CHAR_UNSAFE s,
i,
 
 

Value:

{ \
    (c)=(s)[i]; \
    if(UTF_IS_SURROGATE(c)) { \
        if(UTF_IS_SURROGATE_FIRST(c)) { \
            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
        } else { \
            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
        } \
    } \
}
Get a single code point from an offset that points to any of the code units that belong to that code point.

Assume 0<=i<length.

This could be used for iteration together with UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), but the use of UTF16_NEXT_CHAR_[UN]SAFE() and UTF16_PREV_CHAR_[UN]SAFE() is more efficient for that.

Definition at line 93 of file utf16.h.

#define UTF16_GET_PAIR_VALUE first,
second       (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
 

Definition at line 46 of file utf16.h.

#define UTF16_IS_LEAD uchar       UTF_IS_FIRST_SURROGATE(uchar)
 

Definition at line 72 of file utf16.h.

#define UTF16_IS_SINGLE uchar       !UTF_IS_SURROGATE(uchar)
 

Definition at line 71 of file utf16.h.

#define UTF16_IS_TRAIL uchar       UTF_IS_SECOND_SURROGATE(uchar)
 

Definition at line 73 of file utf16.h.

#define UTF16_LEAD supplementary       UTF_FIRST_SURROGATE(supplementary)
 

alias for UTF_FIRST_SURROGATE.

Definition at line 65 of file utf16.h.

#define UTF16_MAX_CHAR_LENGTH   2
 

Definition at line 78 of file utf16.h.

#define UTF16_NEED_MULTIPLE_UCHAR      ((uint32_t)(c)>0xffff)
 

Definition at line 76 of file utf16.h.

#define UTF16_NEXT_CHAR_SAFE s,
i,
length,
c,
strict   
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(UTF_IS_FIRST_SURROGATE(c)) { \
        uint16_t __c2; \
        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
              \
        } else if(strict) {\
              \
            (c)=UTF_ERROR_VALUE; \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
          \
        (c)=UTF_ERROR_VALUE; \
    } \
}

Definition at line 192 of file utf16.h.

#define UTF16_NEXT_CHAR_UNSAFE s,
i,
 
 

Value:

{ \
    (c)=(s)[(i)++]; \
    if(UTF_IS_FIRST_SURROGATE(c)) { \
        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
    } \
}
Get a single code point from an offset that points to the first of the code units that belong to that code point.

Assume 0<=i<length.

Definition at line 145 of file utf16.h.

#define UTF16_PREV_CHAR_SAFE s,
start,
i,
c,
strict   
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF_IS_SECOND_SURROGATE(c)) { \
        uint16_t __c2; \
        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
            --(i); \
            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
              \
        } else if(strict) {\
              \
            (c)=UTF_ERROR_VALUE; \
        } \
    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
          \
        (c)=UTF_ERROR_VALUE; \
    } \
}

Definition at line 299 of file utf16.h.

#define UTF16_PREV_CHAR_UNSAFE s,
i,
 
 

Value:

{ \
    (c)=(s)[--(i)]; \
    if(UTF_IS_SECOND_SURROGATE(c)) { \
        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
    } \
}
Get a single code point from an offset that points behind the last of the code units that belong to that code point.

Assume 0<=i<length.

Definition at line 261 of file utf16.h.

#define UTF16_SET_CHAR_LIMIT_SAFE s,
start,
i,
length   
 

Value:

{ \
    if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
        ++(i); \
    } \
}

Definition at line 331 of file utf16.h.

#define UTF16_SET_CHAR_LIMIT_UNSAFE s,
 
 

Value:

{ \
    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        ++(i); \
    } \
}
Set a random-access offset and adjust it so that it points after the end of a Unicode character.

The offset that is passed in points behind any code unit of a code point and will point behind the last code unit after the macro invocation. Never decrements the offset.

Definition at line 291 of file utf16.h.

#define UTF16_SET_CHAR_START_SAFE s,
start,
 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
        --(i); \
    } \
}

Definition at line 239 of file utf16.h.

#define UTF16_SET_CHAR_START_UNSAFE s,
 
 

Value:

{ \
    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
        --(i); \
    } \
}
Set a random-access offset and adjust it so that it points to the beginning of a Unicode character.

The offset that is passed in points to any code unit of a code point and will point to the first code unit after the macro invocation. Never increments the offset.

Definition at line 184 of file utf16.h.

#define UTF16_TRAIL supplementary       UTF_SECOND_SURROGATE(supplementary)
 

alias for UTF_SECOND_SURROGATE.

Definition at line 68 of file utf16.h.

#define UTF_FIRST_SURROGATE supplementary       (UChar)(((supplementary)>>10)+0xd7c0)
 

Takes a supplementary code point (0x10000..0x10ffff) and computes the first surrogate (0xd800..0xdbff) for UTF-16 encoding.

Definition at line 55 of file utf16.h.

#define UTF_IS_FIRST_SURROGATE uchar       (((uchar)&0xfffffc00)==0xd800)
 

Definition at line 38 of file utf16.h.

#define UTF_IS_SECOND_SURROGATE uchar       (((uchar)&0xfffffc00)==0xdc00)
 

Definition at line 39 of file utf16.h.

#define UTF_IS_SURROGATE_FIRST      (((c)&0x400)==0)
 

Definition at line 41 of file utf16.h.

#define UTF_SECOND_SURROGATE supplementary       (UChar)(((supplementary)&0x3ff)|0xdc00)
 

Takes a supplementary code point (0x10000..0x10ffff) and computes the second surrogate (0xdc00..0xdfff) for UTF-16 encoding.

Definition at line 62 of file utf16.h.

#define UTF_SURROGATE_OFFSET   ((0xd800<<10UL)+0xdc00-0x10000)
 

Get the UTF-32 value directly from the surrogate pseudo-characters.

Definition at line 44 of file utf16.h.


Generated on Thu Aug 15 14:13:48 2002 for ICU 2.2 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001