Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

ucnv.h File Reference

C API: Character conversion. More...

#include "unicode/utypes.h"
#include "unicode/ucnv_err.h"

Go to the source code of this file.

Defines

#define UCNV_MAX_CONVERTER_NAME_LENGTH   60
#define UCNV_MAX_FULL_FILE_NAME_LENGTH   (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
#define UCNV_SI   0x0F
#define UCNV_SO   0x0E
#define UCNV_OPTION_SEP_CHAR   ','
 Character that separates converter names from options and options from each other. More...

#define UCNV_OPTION_SEP_STRING   ","
 String version of UCNV_OPTION_SEP_CHAR. More...

#define UCNV_VALUE_SEP_CHAR   '='
 Character that separates a converter option from its value. More...

#define UCNV_VALUE_SEP_STRING   "="
 String version of UCNV_VALUE_SEP_CHAR. More...

#define UCNV_LOCALE_OPTION_STRING   ",locale="
 Converter option for specifying a locale. More...

#define U_CNV_SAFECLONE_BUFFERSIZE   512

Typedefs

typedef struct UConverter UConverter
typedef void (* UConverterToUCallback )( void* context, UConverterToUnicodeArgs *args, const char *codePoints, int32_t length, UConverterCallbackReason reason, UErrorCode *)
 Function pointer for error callback in the codepage to unicode direction. More...

typedef void (* UConverterFromUCallback )( void* context, UConverterFromUnicodeArgs *args, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *)
 Function pointer for error callback in the unicode to codepage direction. More...


Enumerations

enum  UConverterType {
  UCNV_UNSUPPORTED_CONVERTER = -1, UCNV_SBCS = 0, UCNV_DBCS = 1, UCNV_MBCS = 2,
  UCNV_LATIN_1 = 3, UCNV_UTF8 = 4, UCNV_UTF16_BigEndian = 5, UCNV_UTF16_LittleEndian = 6,
  UCNV_UTF32_BigEndian = 7, UCNV_UTF32_LittleEndian = 8, UCNV_EBCDIC_STATEFUL = 9, UCNV_ISO_2022 = 10,
  UCNV_LMBCS_1 = 11, UCNV_LMBCS_2, UCNV_LMBCS_3, UCNV_LMBCS_4,
  UCNV_LMBCS_5, UCNV_LMBCS_6, UCNV_LMBCS_8, UCNV_LMBCS_11,
  UCNV_LMBCS_16, UCNV_LMBCS_17, UCNV_LMBCS_18, UCNV_LMBCS_19,
  UCNV_LMBCS_LAST = UCNV_LMBCS_19, UCNV_HZ, UCNV_SCSU, UCNV_US_ASCII,
  UCNV_UTF7, UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
}
 Enum for specifying basic types of converters. More...

enum  UConverterPlatform { UCNV_UNKNOWN = -1, UCNV_IBM = 0 }
 Enum for specifying which platform a converter ID refers to. More...


Functions

U_CAPI int U_EXPORT2 ucnv_compareNames (const char *name1, const char *name2)
 Do a fuzzy compare of a two converter/alias names. More...

U_CAPI UConverter* U_EXPORT2 ucnv_open (const char *converterName, UErrorCode * err)
 Creates a UConverter object with the names specified as a C string. More...

U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name, UErrorCode * err)
 Creates a Unicode converter with the names specified as unicode string. More...

U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage, UConverterPlatform platform, UErrorCode * err)
 Creates a UConverter object from a CCSID number and platform pair. More...

U_CAPI UConverterucnv_safeClone (const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
 Thread safe cloning operation. More...

U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter)
 Deletes the unicode converter and releases resources associated with just this instance. More...

U_CAPI void U_EXPORT2 ucnv_getSubstChars (const UConverter * converter, char *subChars, int8_t * len, UErrorCode * err)
 Fills in the output parameter, subChars, with the substitution characters as multiple bytes. More...

U_CAPI void U_EXPORT2 ucnv_setSubstChars (UConverter * converter, const char *subChars, int8_t len, UErrorCode * err)
 Sets the substitution chars when converting from unicode to a codepage. More...

U_CAPI void U_EXPORT2 ucnv_getInvalidChars (const UConverter * converter, char *errBytes, int8_t * len, UErrorCode * err)
 Fills in the output parameter, errBytes, with the error characters from the last failing conversion. More...

U_CAPI void U_EXPORT2 ucnv_getInvalidUChars (const UConverter * converter, UChar *errUChars, int8_t * len, UErrorCode * err)
 Fills in the output parameter, errChars, with the error characters from the last failing conversion. More...

U_CAPI void U_EXPORT2 ucnv_reset (UConverter * converter)
 Resets the state of a converter to the default state. More...

U_CAPI void ucnv_resetToUnicode (UConverter *converter)
 Resets the to-Unicode part of a converter state to the default state. More...

U_CAPI void ucnv_resetFromUnicode (UConverter *converter)
 Resets the from-Unicode part of a converter state to the default state. More...

U_CAPI int8_t U_EXPORT2 ucnv_getMaxCharSize (const UConverter * converter)
 Returns the maximum length of bytes used by a character. More...

U_CAPI int8_t U_EXPORT2 ucnv_getMinCharSize (const UConverter * converter)
 Returns the minimum byte length for characters in this codepage. More...

U_CAPI int32_t U_EXPORT2 ucnv_getDisplayName (const UConverter * converter, const char *displayLocale, UChar * displayName, int32_t displayNameCapacity, UErrorCode * err)
 Returns the display name of the converter passed in based on the Locale passed in. More...

U_CAPI const char* U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err)
 Gets the internal, canonical name of the converter (zero-terminated). More...

U_CAPI int32_t U_EXPORT2 ucnv_getCCSID (const UConverter * converter, UErrorCode * err)
 Gets a codepage number associated with the converter. More...

U_CAPI UConverterPlatform
U_EXPORT2 
ucnv_getPlatform (const UConverter * converter, UErrorCode * err)
 Gets a codepage platform associated with the converter. More...

U_CAPI UConverterType U_EXPORT2 ucnv_getType (const UConverter * converter)
 Gets the type of the converter e.g. More...

U_CAPI void U_EXPORT2 ucnv_getStarters (const UConverter* converter, UBool starters[256], UErrorCode* err)
 Gets the "starter" (lead) bytes for converters of type MBCS. More...

U_CAPI void U_EXPORT2 ucnv_getToUCallBack (const UConverter * converter, UConverterToUCallback *action, void **context)
 Gets the current calback function used by the converter when an illegal or invalid codepage sequence is found. More...

U_CAPI void U_EXPORT2 ucnv_getFromUCallBack (const UConverter * converter, UConverterFromUCallback *action, void **context)
 Gets the current callback function used by the converter when illegal or invalid Unicode sequence is found. More...

U_CAPI void U_EXPORT2 ucnv_setToUCallBack (UConverter * converter, UConverterToUCallback newAction, void* newContext, UConverterToUCallback *oldAction, void** oldContext, UErrorCode * err)
 Changes the callback function used by the converter when an illegal or invalid sequence is found. More...

U_CAPI void U_EXPORT2 ucnv_setFromUCallBack (UConverter * converter, UConverterFromUCallback newAction, void *newContext, UConverterFromUCallback *oldAction, void **oldContext, UErrorCode * err)
 Changes the current callback function used by the converter when an illegal or invalid sequence is found. More...

U_CAPI void U_EXPORT2 ucnv_fromUnicode (UConverter * converter, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t* offsets, UBool flush, UErrorCode * err)
 Converts an array of unicode characters to an array of codepage characters. More...

U_CAPI void U_EXPORT2 ucnv_toUnicode (UConverter * converter, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t* offsets, UBool flush, UErrorCode * err)
 Converts a buffer of codepage bytes into an array of unicode UChars characters. More...

U_CAPI int32_t U_EXPORT2 ucnv_fromUChars (const UConverter * converter, char *target, int32_t targetCapacity, const UChar * source, int32_t sourceLength, UErrorCode * err)
 Converts the source Unicode string into the target codepage with the specified Unicode converter. More...

U_CAPI int32_t U_EXPORT2 ucnv_toUChars (const UConverter * converter, UChar * target, int32_t targetCapacity, const char *source, int32_t sourceSize, UErrorCode * err)
 Converts the source string in codepage encoding into the target string in Unicode encoding. More...

U_CAPI UChar32 U_EXPORT2 ucnv_getNextUChar (UConverter * converter, const char **source, const char * sourceLimit, UErrorCode * err)
U_CAPI int32_t U_EXPORT2 ucnv_convert (const char *toConverterName, const char *fromConverterName, char *target, int32_t targetCapacity, const char *source, int32_t sourceLength, UErrorCode * err)
U_CAPI int32_t U_EXPORT2 ucnv_flushCache (void)
 Frees up memory occupied by unused, cached converter shared data. More...

U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void)
 returns the number of available converters, as per the alias file. More...

U_CAPI const char* U_EXPORT2 ucnv_getAvailableName (int32_t n)
 Gets the name of the specified converter from a list of all converters contaied in the alias file. More...

U_CAPI uint16_t ucnv_countAliases (const char *alias, UErrorCode *pErrorCode)
 Gives the number of aliases for a given converter or alias name. More...

U_CAPI const char* ucnv_getAlias (const char *alias, uint16_t n, UErrorCode *pErrorCode)
 Gives the name of the alias at given index of alias list. More...

U_CAPI void ucnv_getAliases (const char *alias, const char **aliases, UErrorCode *pErrorCode)
 Fill-up the list of alias names for the given alias. More...

U_CAPI uint16_t U_EXPORT2 ucnv_countStandards (void)
 Gives the number of standards associated to converter names. More...

U_CAPI const char* U_EXPORT2 ucnv_getStandard (uint16_t n, UErrorCode *pErrorCode)
 Gives the name of the standard at given index of standard list. More...

U_CAPI const char* U_EXPORT2 ucnv_getStandardName (const char *name, const char *standard, UErrorCode *pErrorCode)
 Returns a standard name for a given converter name. More...

U_CAPI const char* U_EXPORT2 ucnv_getDefaultName (void)
 returns the current default converter name. More...

U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name)
 sets the current default converter name. More...

U_CAPI void U_EXPORT2 ucnv_fixFileSeparator (const UConverter *cnv, UChar* source, int32_t sourceLen)
 Fixes the backslash character mismapping. More...

U_CAPI UBool U_EXPORT2 ucnv_isAmbiguous (const UConverter *cnv)
 Determines if the converter contains ambiguous mappings of the same character or not. More...

U_CAPI void U_EXPORT2 ucnv_setFallback (UConverter *cnv, UBool usesFallback)
 Sets the converter to use fallback mapping or not. More...

U_CAPI UBool U_EXPORT2 ucnv_usesFallback (const UConverter *cnv)
 Determines if the converter uses fallback mappings or not. More...


Detailed Description

C API: Character conversion.

Character Conversion C API

Definition in file ucnv.h.


Define Documentation

#define UCNV_LOCALE_OPTION_STRING   ",locale="
 

Converter option for specifying a locale.

See also:
ucnv_open

Definition at line 173 of file ucnv.h.

#define UCNV_MAX_CONVERTER_NAME_LENGTH   60
 

Definition at line 40 of file ucnv.h.

#define UCNV_MAX_FULL_FILE_NAME_LENGTH   (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
 

Definition at line 41 of file ucnv.h.

#define UCNV_OPTION_SEP_CHAR   ','
 

Character that separates converter names from options and options from each other.

See also:
ucnv_open

Definition at line 149 of file ucnv.h.

#define UCNV_OPTION_SEP_STRING   ","
 

String version of UCNV_OPTION_SEP_CHAR.

See also:
ucnv_open

Definition at line 155 of file ucnv.h.

#define UCNV_SI   0x0F
 

Definition at line 43 of file ucnv.h.

#define UCNV_SO   0x0E
 

Definition at line 44 of file ucnv.h.

#define UCNV_VALUE_SEP_CHAR   '='
 

Character that separates a converter option from its value.

See also:
ucnv_open

Definition at line 161 of file ucnv.h.

#define UCNV_VALUE_SEP_STRING   "="
 

String version of UCNV_VALUE_SEP_CHAR.

See also:
ucnv_open

Definition at line 167 of file ucnv.h.

#define U_CNV_SAFECLONE_BUFFERSIZE   512
 

Definition at line 338 of file ucnv.h.


Typedef Documentation

typedef struct UConverter UConverter
 

Definition at line 32 of file ucnv.h.

typedef void (* UConverterFromUCallback)( void* context, UConverterFromUnicodeArgs *args, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *)
 

Function pointer for error callback in the unicode to codepage direction.

Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).

Parameters:
context   Pointer to the callback's private data
args   Information about the conversion in progress
codeUnits   Points to 'length' UChars of the concerned Unicode sequence
length   Size (in bytes) of the concerned codepage sequence
codePoint   Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
reason   Defines the reason the callback was invoked
See also:
ucnv_setFromUCallBack

Definition at line 134 of file ucnv.h.

typedef void (* UConverterToUCallback)( void* context, UConverterToUnicodeArgs *args, const char *codePoints, int32_t length, UConverterCallbackReason reason, UErrorCode *)
 

Function pointer for error callback in the codepage to unicode direction.

Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).

Parameters:
context   Pointer to the callback's private data
args   Information about the conversion in progress
codePoints   Points to 'length' bytes of the concerned codepage sequence
length   Size (in bytes) of the concerned codepage sequence
reason   Defines the reason the callback was invoked
See also:
ucnv_setToUCallBack , UConverterToUnicodeArgs

Definition at line 114 of file ucnv.h.


Enumeration Type Documentation

enum UConverterPlatform
 

Enum for specifying which platform a converter ID refers to.

The use of platform/CCSID is not recommended. See ucnv_openCCSID().

See also:
ucnv_getPlatform , ucnv_openCCSID , ucnv_getCCSID
Enumeration values:
UCNV_UNKNOWN  
UCNV_IBM  

Definition at line 97 of file ucnv.h.

enum UConverterType
 

Enum for specifying basic types of converters.

See also:
ucnv_getType
Enumeration values:
UCNV_UNSUPPORTED_CONVERTER  
UCNV_SBCS  
UCNV_DBCS  
UCNV_MBCS  
UCNV_LATIN_1  
UCNV_UTF8  
UCNV_UTF16_BigEndian  
UCNV_UTF16_LittleEndian  
UCNV_UTF32_BigEndian  
UCNV_UTF32_LittleEndian  
UCNV_EBCDIC_STATEFUL  
UCNV_ISO_2022  
UCNV_LMBCS_1  
UCNV_LMBCS_2  
UCNV_LMBCS_3  
UCNV_LMBCS_4  
UCNV_LMBCS_5  
UCNV_LMBCS_6  
UCNV_LMBCS_8  
UCNV_LMBCS_11  
UCNV_LMBCS_16  
UCNV_LMBCS_17  
UCNV_LMBCS_18  
UCNV_LMBCS_19  
UCNV_LMBCS_LAST  
UCNV_HZ  
UCNV_SCSU  
UCNV_US_ASCII  
UCNV_UTF7  
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES  

Definition at line 51 of file ucnv.h.


Function Documentation

U_CAPI void U_EXPORT2 ucnv_close ( UConverter * converter )
 

Deletes the unicode converter and releases resources associated with just this instance.

Does not free up shared converter tables.

Parameters:
converter   the converter object to be deleted
See also:
ucnv_open , ucnv_openU , ucnv_openCCSID
Stable:

U_CAPI int U_EXPORT2 ucnv_compareNames ( const char * name1,
const char * name2 )
 

Do a fuzzy compare of a two converter/alias names.

The comparison is case-insensitive. It also ignores the characters '-', '_', and ' ' (dash, underscore, and space). Thus the strings "UTF-8", "utf_8", and "Utf 8" are exactly equivalent.

Parameters:
name1   a converter name or alias, zero-terminated
name2   a converter name or alias, zero-terminated
Returns:
0 if the names match, or a negative value if the name1 lexically precedes name2, or a positive value if the name1 lexically follows name2.
Draft:

U_CAPI int32_t U_EXPORT2 ucnv_convert ( const char * toConverterName,
const char * fromConverterName,
char * target,
int32_t targetCapacity,
const char * source,
int32_t sourceLength,
UErrorCode * err )
 

U_CAPI uint16_t ucnv_countAliases ( const char * alias,
UErrorCode * pErrorCode )
 

Gives the number of aliases for a given converter or alias name.

Note that additional aliases are recognized by ucnv_open(). This method only enumerates the listed entries in the alias file.

Parameters:
alias   alias name
pErrorCode   error status
Returns:
number of names on alias list for given alias
Stable:

U_CAPI int32_t U_EXPORT2 ucnv_countAvailable ( void )
 

returns the number of available converters, as per the alias file.

Returns:
the number of available converters
See also:
ucnv_getAvailableName
Stable:

U_CAPI uint16_t U_EXPORT2 ucnv_countStandards ( void )
 

Gives the number of standards associated to converter names.

Returns:
number of standards
Stable:

U_CAPI void U_EXPORT2 ucnv_fixFileSeparator ( const UConverter * cnv,
UChar * source,
int32_t sourceLen )
 

Fixes the backslash character mismapping.

For example, in SJIS, the backslash character in the ASCII portion is also used to represent the yen currency sign. When mapping from Unicode character 0x005C, it's unclear whether to map the character back to yen or backslash in SJIS. This function will take the input buffer and replace all the yen sign characters with backslash. This is necessary when the user tries to open a file with the input buffer on Windows. This function will test the converter to see whether such mapping is required.

Parameters:
cnv   The converter representing the target codepage.
source   the input buffer to be fixed
sourceLength   the length of the input buffer
See also:
ucnv_isAmbiguous
Draft:

U_CAPI int32_t U_EXPORT2 ucnv_flushCache ( void )
 

Frees up memory occupied by unused, cached converter shared data.

Returns:
the number of cached converters successfully deleted
See also:
ucnv_close
Stable:
System:
SYSTEM API

U_CAPI int32_t U_EXPORT2 ucnv_fromUChars ( const UConverter * converter,
char * target,
int32_t targetCapacity,
const UChar * source,
int32_t sourceLength,
UErrorCode * err )
 

Converts the source Unicode string into the target codepage with the specified Unicode converter.

If any problems during conversion are encountered, the currently installed fromUnicode callback will be used. This function is a more convenient but less efficient version of \Ref{ucnv_fromUnicode}. targetLength may be 0 if you only want to know the exact number of target bytes required. The maximum target buffer size required (barring callbacks) will be sourceLength*ucnv_getMaxCharSize()

Parameters:
converter   the Unicode converter
target   the target buffer (not zero-terminated because the structure of codepages varies. There is not a reliable way to produce a terminator.)
targetCapacity   the number of bytes available in the target buffer
source   the source Unicode string
sourceLength   the length of the source string. If -1 is passed in as the value, the source buffer is a zero terminated string and whole source buffer will be converted.
err   the error status code. U_ILLEGAL_ARGUMENT_ERROR is returned if the converter is NULL or the source or target string is empty. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still input left in the source.
Returns:
number of bytes needed in target, regardless of targetCapacity
See also:
ucnv_fromUnicode , ucnv_convert
Draft:
backslash versus Yen sign in shift-JIS

U_CAPI void U_EXPORT2 ucnv_fromUnicode ( UConverter * converter,
char ** target,
const char * targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t * offsets,
UBool flush,
UErrorCode * err )
 

Converts an array of unicode characters to an array of codepage characters.

This function is optimized for converting a continuous stream of data in buffer-sized chunks, where the entire source and target does not fit in available buffers.

The source pointer is an in/out parameter. It starts out pointing where the conversion is to begin, and ends up pointing after the last UChar consumed.

Target similarly starts out pointer at the first available byte in the output buffer, and ends up pointing after the last byte written to the output.

The converter always attempts to consume the entire source buffer, unless (1.) the target buffer is full, or (2.) a failing error is returned from the current callback function. When a successful error status has been returned, it means that all of the source buffer has been consumed. At that point, the caller should reset the source and sourceLimit pointers to point to the next chunk.

This is a stateful conversion. Additionally, even when all source data has been consumed, some data may be in the converters' internal state. Call this function repeatedly, updating the target pointers with the next empty chunk of target in case of a U_BUFFER_OVERFLOW_ERROR, and updating the source pointers with the next chunk of source when a successful error status is returned, until there are no more chunks of source data.

Parameters:
converter   the Unicode converter
target   I/O parameter. Input : Points to the beginning of the buffer to copy codepage characters to. Output : points to after the last codepage character copied to target.
targetLimit   the pointer just after last of the target buffer
source   I/O parameter, pointer to pointer to the source Unicode character buffer.
sourceLimit   the pointer just after the last of the source buffer
offsets   if NULL is passed, nothing will happen to it, otherwise it needs to have the same number of allocated cells as target. Will fill in offsets from target to source pointer e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6] For output data carried across calls, and other data without a specific source character (such as from escape sequences or callbacks) -1 will be placed for offsets.
flush   set to TRUE if the current source buffer is the last available chunk of the source, FALSE otherwise. Note that if a failing status is returned, this function may have to be called multiple times wiht flush set to TRUE until the source buffer is consumed.
err   the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the converter is NULL. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still data to be written to the target.
See also:
ucnv_fromUChars , ucnv_convert , ucnv_getMinCharSize , ucnv_setToUCallBack
Draft:
backslash versus Yen sign in shift-JIS

U_CAPI const char * ucnv_getAlias ( const char * alias,
uint16_t n,
UErrorCode * pErrorCode )
 

Gives the name of the alias at given index of alias list.

Note that additional aliases are recognized by ucnv_open(). This method only enumerates the listed entries in the alias file.

Parameters:
alias   alias name
n   index in alias list
pErrorCode   result of operation
Returns:
returns the name of the alias at given index
See also:
ucnv_countAliases
Stable:

U_CAPI void ucnv_getAliases ( const char * alias,
const char ** aliases,
UErrorCode * pErrorCode )
 

Fill-up the list of alias names for the given alias.

Note that additional aliases are recognized by ucnv_open(). This method only enumerates the listed entries in the alias file.

Parameters:
alias   alias name
aliases   fill-in list, aliases is a pointer to an array of ucnv_countAliases() string-pointers (const char *) that will be filled in. The strings themselves are owned by the library.
pErrorCode   result of operation
Stable:

U_CAPI const char *U_EXPORT2 ucnv_getAvailableName ( int32_t n )
 

Gets the name of the specified converter from a list of all converters contaied in the alias file.

Parameters:
n   the index to a converter available on the system (in the range [0..ucnv_countAvaiable()])
Returns:
a pointer a string (library owned), or NULL if the index is out of bounds.
See also:
ucnv_countAvailable
Stable:

U_CAPI int32_t U_EXPORT2 ucnv_getCCSID ( const UConverter * converter,
UErrorCode * err )
 

Gets a codepage number associated with the converter.

This is not guaranteed to be the one used to create the converter. Some converters do not represent platform registered codepages and return zero for the codepage number. The error code fill-in parameter indicates if the codepage number is available. Does not check if the converter is NULL or if converter's data table is NULL.

Important: The use of CCSIDs is not recommended because it is limited to only two platforms in principle and only one (UCNV_IBM) in the current ICU converter API. Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. For more details see ucnv_openCCSID().

Parameters:
converter   the Unicode converter
err   the error status code.
Returns:
If any error occurrs, -1 will be returned otherwise, the codepage number will be returned
See also:
ucnv_openCCSID , ucnv_getPlatform
Stable:

U_CAPI const char *U_EXPORT2 ucnv_getDefaultName ( void )
 

returns the current default converter name.

Returns:
returns the current default converter name; if a default converter name cannot be determined, then NULL is returned. Storage owned by the library
See also:
ucnv_setDefaultName
Stable:

U_CAPI int32_t U_EXPORT2 ucnv_getDisplayName ( const UConverter * converter,
const char * displayLocale,
UChar * displayName,
int32_t displayNameCapacity,
UErrorCode * err )
 

Returns the display name of the converter passed in based on the Locale passed in.

If the locale contains no display name, the internal ASCII name will be filled in.

Parameters:
converter   the Unicode converter.
displayLocale   is the specific Locale we want to localised for
displayName   user provided buffer to be filled in
displayNameCapacty   size of displayName Buffer
err   error status code
Returns:
displayNameLength number of UChar needed in displayName
See also:
ucnv_getName
Stable:

U_CAPI void U_EXPORT2 ucnv_getFromUCallBack ( const UConverter * converter,
UConverterFromUCallback * action,
void ** context )
 

Gets the current callback function used by the converter when illegal or invalid Unicode sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter   the unicode converter
action   fillin: returns the callback function pointer
context   fillin: returns the callback's private void* context
See also:
ucnv_setFromUCallBack
Stable:

U_CAPI void U_EXPORT2 ucnv_getInvalidChars ( const UConverter * converter,
char * errBytes,
int8_t * len,
UErrorCode * err )
 

Fills in the output parameter, errBytes, with the error characters from the last failing conversion.

Parameters:
converter   the Unicode converter
errBytes   the codepage bytes which were in error
len   on input the capacity of errBytes, on output the number of bytes which were copied to it
err   the error status code. If the substitution character array is too small, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
Stable:

U_CAPI void U_EXPORT2 ucnv_getInvalidUChars ( const UConverter * converter,
UChar * errUChars,
int8_t * len,
UErrorCode * err )
 

Fills in the output parameter, errChars, with the error characters from the last failing conversion.

Parameters:
converter   the Unicode converter
errUChars   the UChars which were in error
len   on input the capacity of errUChars, on output the number of UChars which were copied to it
err   the error status code. If the substitution character array is too small, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
Stable:

U_CAPI int8_t U_EXPORT2 ucnv_getMaxCharSize ( const UConverter * converter )
 

Returns the maximum length of bytes used by a character.

This varies between 1 and 4

Parameters:
converter   the Unicode converter
Returns:
the maximum number of bytes allowed by this particular converter
See also:
ucnv_getMinCharSize
Stable:

U_CAPI int8_t U_EXPORT2 ucnv_getMinCharSize ( const UConverter * converter )
 

Returns the minimum byte length for characters in this codepage.

This is either 1 or 2 for all supported codepages.

Parameters:
converter   the Unicode converter
Returns:
the minimum number of bytes allowed by this particular converter
See also:
ucnv_getMaxCharSize
Stable:

U_CAPI const char *U_EXPORT2 ucnv_getName ( const UConverter * converter,
UErrorCode * err )
 

Gets the internal, canonical name of the converter (zero-terminated).

The lifetime of the returned string will be that of the converter passed to this function.

Parameters:
converter   the Unicode converter
err   UErrorCode status
Returns:
the internal name of the converter
See also:
ucnv_getDisplayName
Stable:

U_CAPI UChar32 U_EXPORT2 ucnv_getNextUChar ( UConverter * converter,
const char ** source,
const char * sourceLimit,
UErrorCode * err )
 

U_CAPI UConverterPlatform U_EXPORT2 ucnv_getPlatform ( const UConverter * converter,
UErrorCode * err )
 

Gets a codepage platform associated with the converter.

Currently, only UCNV_IBM will be returned. Does not test if the converter is NULL or if converter's data table is NULL.

Parameters:
converter   the Unicode converter
err   the error status code.
Returns:
The codepage platform
Stable:

U_CAPI const char *U_EXPORT2 ucnv_getStandard ( uint16_t n,
UErrorCode * pErrorCode )
 

Gives the name of the standard at given index of standard list.

Parameters:
n   index in standard list
pErrorCode   result of operation
Returns:
returns the name of the standard at given index. Owned by the library.
Stable:

U_CAPI const char *U_EXPORT2 ucnv_getStandardName ( const char * name,
const char * standard,
UErrorCode * pErrorCode )
 

Returns a standard name for a given converter name.

Parameters:
name   original converter name
standard   name of the standard governing the names; MIME and IANA are such standards
Returns:
returns the standard converter name; if a standard converter name cannot be determined, then NULL is returned. Owned by the library.
Stable:

U_CAPI void U_EXPORT2 ucnv_getStarters ( const UConverter * converter,
UBool starters[256],
UErrorCode * err )
 

Gets the "starter" (lead) bytes for converters of type MBCS.

Will fill in an U_ILLEGAL_ARGUMENT_ERROR if converter passed in is not MBCS. Fills in an array of type UBool, with the value of the byte as offset to the array. For example, if (starters[0x20] == TRUE) at return, it means that the byte 0x20 is a starter byte in this converter. Context pointers are always owned by the caller.

Parameters:
converter   a valid, opened converter of type MBCS
starters   an array of size 256 to be filled in
err   error status, U_ILLEGAL_ARGUMENT_ERROR if the converter is not a type which can return starters.
See also:
ucnv_getType
Stable:

U_CAPI void U_EXPORT2 ucnv_getSubstChars ( const UConverter * converter,
char * subChars,
int8_t * len,
UErrorCode * err )
 

Fills in the output parameter, subChars, with the substitution characters as multiple bytes.

Parameters:
converter   the Unicode converter
subChars   the subsitution characters
len   on input the capacity of subChars, on output the number of bytes copied to it
err   the outgoing error status code. If the substitution character array is too small, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
See also:
ucnv_setSubstChars
Stable:

U_CAPI void U_EXPORT2 ucnv_getToUCallBack ( const UConverter * converter,
UConverterToUCallback * action,
void ** context )
 

Gets the current calback function used by the converter when an illegal or invalid codepage sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter   the unicode converter
action   fillin: returns the callback function pointer
context   fillin: returns the callback's private void* context
See also:
ucnv_setToUCallBack
Stable:

U_CAPI UConverterType U_EXPORT2 ucnv_getType ( const UConverter * converter )
 

Gets the type of the converter e.g.

SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1

Parameters:
converter   a valid, opened converter
Returns:
the type of the converter
Stable:

U_CAPI UBool U_EXPORT2 ucnv_isAmbiguous ( const UConverter * cnv )
 

Determines if the converter contains ambiguous mappings of the same character or not.

Returns:
TRUE if the converter contains ambiguous mapping of the same character, FALSE otherwise.
Draft:

U_CAPI UConverter *U_EXPORT2 ucnv_open ( const char * converterName,
UErrorCode * err )
 

Creates a UConverter object with the names specified as a C string.

The actual name will be resolved with the alias file using a case-insensitive string comparison that ignores the delimiters '-', '_', and ' ' (dash, underscore, and space). E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. If NULL is passed for the converter name, it will create one with the getDefaultName return value.

A converter name for ICU 1.5 and above may contain options like a locale specification to control the specific behavior of the newly instantiated converter. The meaning of the options depends on the particular converter. If an option is not defined for or recognized by a given converter, then it is ignored.

Options are appended to the converter name string, with a UCNV_OPTION_SEP_CHAR between the name and the first option and also between adjacent options.

Parameters:
converterName   : name of the uconv table, may have options appended
err   outgoing error status U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND
Returns:
the created Unicode converter object, or NULL if an error occured
See also:
ucnv_openU , ucnv_openCCSID , ucnv_close
Stable:

U_CAPI UConverter *U_EXPORT2 ucnv_openCCSID ( int32_t codepage,
UConverterPlatform platform,
UErrorCode * err )
 

Creates a UConverter object from a CCSID number and platform pair.

Note that the usefulness of this function is limited to platforms with numeric encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for encodings.

In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and for some Unicode conversion tables there are multiple CCSIDs. Some "alternate" Unicode conversion tables are provided by the IBM CDRA conversion table registry. The most prominent example of a systematic modification of conversion tables that is not provided in the form of conversion table files in the repository is that S/390 Unix System Services swaps the codes for Line Feed and New Line in all EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.

Only IBM default conversion tables are accessible with ucnv_openCCSID(). ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated with that CCSID.

Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.

In summary, the use of CCSIDs and the associated API functions is not recommended.

In order to open a converter with the default IBM CDRA Unicode conversion table, you can use this function or use the prefix "ibm-":

     char name[20];
     sprintf(name, "ibm-%hu", ccsid);
     cnv=ucnv_open(name, &errorCode);

In order to open a converter with the IBM S/390 Unix System Services variant of a Unicode/EBCDIC conversion table, you can use the prefix "ibm-" together with the suffix "-s390":

     char name[20];
     sprintf(name, "ibm-%hu-s390", ccsid);
     cnv=ucnv_open(name, &errorCode);

In order to open a converter from a Microsoft codepage number, use the prefix "cp":

     char name[20];
     sprintf(name, "cp%hu", codepageID);
     cnv=ucnv_open(name, &errorCode);
Parameters:
codepage   codepage number to create
platform   the platform in which the codepage number exists
err   error status U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND
Returns:
the created Unicode converter object, or NULL if an error occured.
See also:
ucnv_open , ucnv_openU , ucnv_close , ucnv_getCCSID , ucnv_getPlatform , UConverterPlatform
Stable:

U_CAPI UConverter *U_EXPORT2 ucnv_openU ( const UChar * name,
UErrorCode * err )
 

Creates a Unicode converter with the names specified as unicode string.

The name should be limited to the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file using a case-insensitive string comparison that ignores the delimiters '-', '_', and ' ' (dash, underscore, and space). E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent. If NULL is passed for the converter name, it will create one with the ucnv_getDefaultName() return value.

Parameters:
converterName   : name of the uconv table in a zero terminated Unicode string
err   outgoing error status U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND
Returns:
the created Unicode converter object, or NULL if an error occured
See also:
ucnv_open , ucnv_openCCSID , ucnv_close , ucnv_getDefaultName
Stable:

U_CAPI void U_EXPORT2 ucnv_reset ( UConverter * converter )
 

Resets the state of a converter to the default state.

This is used in the case of an error, to restart a conversion from a known default state. It will also empty the internal output buffers.

Parameters:
converter   the Unicode converter
Stable:

U_CAPI void ucnv_resetFromUnicode ( UConverter * converter )
 

Resets the from-Unicode part of a converter state to the default state.

This is used in the case of an error to restart a conversion from Unicode to a known default state. It will also empty the internal output buffers used for the conversion from Unicode codepoints.

Parameters:
converter   the Unicode converter
Draft:

U_CAPI void ucnv_resetToUnicode ( UConverter * converter )
 

Resets the to-Unicode part of a converter state to the default state.

This is used in the case of an error to restart a conversion to Unicode to a known default state. It will also empty the internal output buffers used for the conversion to Unicode codepoints.

Parameters:
converter   the Unicode converter
Draft:

U_CAPI UConverter * ucnv_safeClone ( const UConverter * cnv,
void * stackBuffer,
int32_t * pBufferSize,
UErrorCode * status )
 

Thread safe cloning operation.

Parameters:
cnv   converter to be cloned
stackBuffer   user allocated space for the new clone. If NULL new memory will be allocated. If buffer is not large enough, new memory will be allocated. Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
pBufferSize   pointer to size of allocated space. If *pBufferSize == 0, a sufficient size for use in cloning will be returned ('pre-flighting') If *pBufferSize is not enough for a stack-based safe clone, new memory will be allocated.
status   to indicate whether the operation went on smoothly or there were errors An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
Returns:
pointer to the new clone
Draft:
API 1.8 freeze

U_CAPI void U_EXPORT2 ucnv_setDefaultName ( const char * name )
 

sets the current default converter name.

Caller must own the storage for 'name' and preserve it indefinitely.

Parameters:
name   the converter name to be the default (must exist).
See also:
ucnv_getDefaultName
System:
SYSTEM API

U_CAPI void U_EXPORT2 ucnv_setFallback ( UConverter * cnv,
UBool usesFallback )
 

Sets the converter to use fallback mapping or not.

Parameters:
cnv   The converter to set the fallback mapping usage on.
usesFallback   TRUE if the user wants the converter to take advantage of the fallback mapping, FALSE otherwise.
Draft:

U_CAPI void U_EXPORT2 ucnv_setFromUCallBack ( UConverter * converter,
UConverterFromUCallback newAction,
void * newContext,
UConverterFromUCallback * oldAction,
void ** oldContext,
UErrorCode * err )
 

Changes the current callback function used by the converter when an illegal or invalid sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter   the unicode converter
newAction   the new callback function
newContext   the new fromUnicode callback context pointer
oldAction   fillin: returns the old callback function pointer
oldContext   fillin: returns the old callback's private void* context
err   The error code status
See also:
ucnv_getFromUCallBack
Stable:

U_CAPI void U_EXPORT2 ucnv_setSubstChars ( UConverter * converter,
const char * subChars,
int8_t len,
UErrorCode * err )
 

Sets the substitution chars when converting from unicode to a codepage.

The substitution is specified as a string of 1-4 bytes, and may contain NULL byte.

Parameters:
converter   the Unicode converter
subChars   the substitution character byte sequence we want set
len   the number of bytes in subChars
err   the error status code. U_INDEX_OUTOFBOUNDS_ERROR if len is bigger than the maximum number of bytes allowed in subchars
See also:
ucnv_getSubstChars
Stable:

U_CAPI void U_EXPORT2 ucnv_setToUCallBack ( UConverter * converter,
UConverterToUCallback newAction,
void * newContext,
UConverterToUCallback * oldAction,
void ** oldContext,
UErrorCode * err )
 

Changes the callback function used by the converter when an illegal or invalid sequence is found.

Context pointers are always owned by the caller.

Parameters:
converter   the unicode converter
newAction   the new callback function
newContext   the new toUnicode callback context pointer
oldAction   fillin: returns the old callback function pointer
oldContext   fillin: returns the old callback's private void* context
err   The error code status
See also:
ucnv_getToUCallBack
Stable:

U_CAPI int32_t U_EXPORT2 ucnv_toUChars ( const UConverter * converter,
UChar * target,
int32_t targetCapacity,
const char * source,
int32_t sourceSize,
UErrorCode * err )
 

Converts the source string in codepage encoding into the target string in Unicode encoding.

For example, if a JIS converter is used, the source string in JIS encoding will be converted into Unicode and placed into the provided target buffer. If any problems during conversion are encountered, the currently installed fromUnicode callback will be used. Barring callbacks which may write longer sequences, the target buffer should be of size 1+(2*(sourceLen / ucnv_getMinCharSize())) because the worst case is that each source sequence is the minimum size, and that sequence produces a surrogate pair. (plus the zero termination). A zero-terminator will be placed at the end of the target buffer. This function is a more convenient but less efficient version of \Ref{ucnv_toUnicode}.

Parameters:
converter   the Unicode converter
source   the source string in codepage encoding
target   the target string in Unicode encoding
targetCapacity   capacity of the target buffer
sourceSize   : Number of bytes in source to be transcoded
err   the error status code U_MEMORY_ALLOCATION_ERROR will be returned if the the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR is returned if the converter is NULL or if the source or target string is empty. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still input left in the source.
Returns:
the number of UChar needed in target (including the zero terminator)
See also:
ucnv_getNextUChar , ucnv_toUnicode , ucnv_convert
Stable:

U_CAPI void U_EXPORT2 ucnv_toUnicode ( UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char ** source,
const char * sourceLimit,
int32_t * offsets,
UBool flush,
UErrorCode * err )
 

Converts a buffer of codepage bytes into an array of unicode UChars characters.

This function is optimized for converting a continuous stream of data in buffer-sized chunks, where the entire source and target does not fit in available buffers.

The source pointer is an in/out parameter. It starts out pointing where the conversion is to begin, and ends up pointing after the last byte of source consumed.

Target similarly starts out pointer at the first available UChar in the output buffer, and ends up pointing after the last UChar written to the output. It does NOT necessarily keep UChar sequences together.

The converter always attempts to consume the entire source buffer, unless (1.) the target buffer is full, or (2.) a failing error is returned from the current callback function. When a successful error status has been returned, it means that all of the source buffer has been consumed. At that point, the caller should reset the source and sourceLimit pointers to point to the next chunk.

This is a stateful conversion. Additionally, even when all source data has been consumed, some data may be in the converters' internal state. Call this function repeatedly, updating the target pointers with the next empty chunk of target in case of a U_BUFFER_OVERFLOW_ERROR, and updating the source pointers with the next chunk of source when a successful error status is returned, until there are no more chunks of source data.

Parameters:
converter   the Unicode converter
target   I/O parameter. Input : Points to the beginning of the buffer to copy UChars into. Output : points to after the last UChar copied.
targetLimit   the pointer just after the end of the target buffer
source   I/O parameter, pointer to pointer to the source codepage buffer.
sourceLimit   the pointer to the byte after the end of the source buffer
offsets   if NULL is passed, nothing will happen to it, otherwise it needs to have the same number of allocated cells as target. Will fill in offsets from target to source pointer e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6] For output data carried across calls, and other data without a specific source character (such as from escape sequences or callbacks) -1 will be placed for offsets.
flush   set to TRUE if the current source buffer is the last available chunk of the source, FALSE otherwise. Note that if a failing status is returned, this function may have to be called multiple times wiht flush set to TRUE until the source buffer is consumed.
err   the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the converter is NULL. U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is still data to be written to the target.
See also:
ucnv_fromUChars , ucnv_convert , ucnv_getMinCharSize , ucnv_setFromUCallBack , ucnv_getNextUChar
Draft:
backslash versus Yen sign in shift-JIS
Stable:

U_CAPI UBool U_EXPORT2 ucnv_usesFallback ( const UConverter * cnv )
 

Determines if the converter uses fallback mappings or not.

Returns:
TRUE if the converter uses fallback, FALSE otherwise.
Draft:


Generated at Tue Jun 12 14:04:10 2001 for ICU 1.8.1 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000