Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

ucnv.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1999-2001, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006  *  ucnv.h:
00007  *  External APIs for the ICU's codeset conversion library
00008  *  Bertrand A. Damiba
00009  *
00010  * Modification History:
00011  *
00012  *   Date        Name        Description
00013  *   04/04/99    helena      Fixed internal header inclusion.
00014  *   05/11/00    helena      Added setFallback and usesFallback APIs.
00015  *   06/29/2000  helena      Major rewrite of the callback APIs.
00016  *   12/07/2000  srl         Update of documentation
00017  */
00018 
00027 #ifndef UCNV_H
00028 #define UCNV_H
00029 
00030 /* Forward declaring the UConverter structure */
00031 struct UConverter;
00032 typedef struct UConverter UConverter;
00033 
00034 #include "unicode/utypes.h"
00035 #include "unicode/ucnv_err.h"
00036 
00037 U_CDECL_BEGIN
00038 
00039 /* maximum length of the converter names */
00040 #define UCNV_MAX_CONVERTER_NAME_LENGTH 60
00041 #define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
00042 
00043 #define  UCNV_SI 0x0F           /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
00044 #define  UCNV_SO 0x0E           /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
00045 
00051 typedef enum {
00052     UCNV_UNSUPPORTED_CONVERTER = -1,
00053     UCNV_SBCS = 0,
00054     UCNV_DBCS = 1,
00055     UCNV_MBCS = 2,
00056     UCNV_LATIN_1 = 3,
00057     UCNV_UTF8 = 4,
00058     UCNV_UTF16_BigEndian = 5,
00059     UCNV_UTF16_LittleEndian = 6,
00060     UCNV_UTF32_BigEndian = 7,
00061     UCNV_UTF32_LittleEndian = 8,
00062     UCNV_EBCDIC_STATEFUL = 9,
00063     UCNV_ISO_2022 = 10,
00064 
00065     UCNV_LMBCS_1 = 11,
00066     UCNV_LMBCS_2, 
00067     UCNV_LMBCS_3,
00068     UCNV_LMBCS_4,
00069     UCNV_LMBCS_5,
00070     UCNV_LMBCS_6,
00071     UCNV_LMBCS_8,
00072     UCNV_LMBCS_11,
00073     UCNV_LMBCS_16,
00074     UCNV_LMBCS_17,
00075     UCNV_LMBCS_18,
00076     UCNV_LMBCS_19,
00077     UCNV_LMBCS_LAST = UCNV_LMBCS_19,
00078     UCNV_HZ,
00079     UCNV_SCSU,
00080     UCNV_US_ASCII,
00081     UCNV_UTF7,
00082 
00083     /* Number of converter types for which we have conversion routines. */
00084     UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
00085    
00086 } UConverterType;
00087 
00097 typedef enum {
00098     UCNV_UNKNOWN = -1,
00099     UCNV_IBM = 0
00100 } UConverterPlatform;
00101 
00114 typedef void (*UConverterToUCallback) (
00115                   void* context,
00116                   UConverterToUnicodeArgs *args,
00117                   const char *codePoints,
00118                   int32_t length,
00119                   UConverterCallbackReason reason,
00120                   UErrorCode *);
00121 
00134 typedef void (*UConverterFromUCallback) (
00135                     void* context,
00136                     UConverterFromUnicodeArgs *args,
00137                     const UChar* codeUnits,
00138                     int32_t length,
00139                     UChar32 codePoint,
00140                     UConverterCallbackReason reason,
00141                     UErrorCode *);
00142 
00143 U_CDECL_END
00144 
00149 #define UCNV_OPTION_SEP_CHAR ','
00150 
00155 #define UCNV_OPTION_SEP_STRING ","
00156 
00161 #define UCNV_VALUE_SEP_CHAR '='
00162 
00167 #define UCNV_VALUE_SEP_STRING "="
00168 
00173 #define UCNV_LOCALE_OPTION_STRING ",locale="
00174 
00188 U_CAPI int U_EXPORT2
00189 ucnv_compareNames(const char *name1, const char *name2);
00190 
00191 
00220 U_CAPI UConverter* U_EXPORT2 
00221 ucnv_open   (const char *converterName, UErrorCode * err);
00222 
00223 
00245 U_CAPI UConverter* U_EXPORT2 
00246 ucnv_openU (const UChar * name,
00247             UErrorCode * err);
00248 
00310 U_CAPI UConverter* U_EXPORT2
00311 ucnv_openCCSID (int32_t codepage,
00312                 UConverterPlatform platform,
00313                 UErrorCode * err);
00314 
00332 U_CAPI UConverter *
00333 ucnv_safeClone(const UConverter *cnv, 
00334                void             *stackBuffer,
00335                int32_t          *pBufferSize, 
00336                UErrorCode       *status);
00337 
00338 #define U_CNV_SAFECLONE_BUFFERSIZE 512
00339 
00351 U_CAPI void  U_EXPORT2
00352 ucnv_close (UConverter * converter);
00353 
00369 U_CAPI void U_EXPORT2
00370 ucnv_getSubstChars (const UConverter * converter,
00371                     char *subChars,
00372                     int8_t * len,
00373                     UErrorCode * err);
00374 
00388 U_CAPI void U_EXPORT2
00389 ucnv_setSubstChars (UConverter * converter,
00390                     const char *subChars,
00391                     int8_t len,
00392                     UErrorCode * err);
00393 
00408 U_CAPI void U_EXPORT2
00409 ucnv_getInvalidChars (const UConverter * converter,
00410                       char *errBytes,
00411                       int8_t * len,
00412                       UErrorCode * err);
00413 
00428 U_CAPI void U_EXPORT2
00429 ucnv_getInvalidUChars (const UConverter * converter,
00430                        UChar *errUChars,
00431                        int8_t * len,
00432                        UErrorCode * err);
00433 
00441 U_CAPI void U_EXPORT2
00442 ucnv_reset (UConverter * converter);
00443 
00453 U_CAPI void
00454 ucnv_resetToUnicode(UConverter *converter);
00455 
00464 U_CAPI void
00465 ucnv_resetFromUnicode(UConverter *converter);
00466 
00475 U_CAPI int8_t U_EXPORT2
00476 ucnv_getMaxCharSize (const UConverter * converter);
00477 
00478 
00487 U_CAPI int8_t U_EXPORT2
00488 ucnv_getMinCharSize (const UConverter * converter);
00489 
00504 U_CAPI int32_t U_EXPORT2
00505 ucnv_getDisplayName (const UConverter * converter,
00506                      const char *displayLocale,
00507                      UChar * displayName,
00508                      int32_t displayNameCapacity,
00509                      UErrorCode * err);
00510 
00522 U_CAPI const char * U_EXPORT2 
00523 ucnv_getName (const UConverter * converter, UErrorCode * err);
00524 
00525 
00549 U_CAPI int32_t U_EXPORT2
00550 ucnv_getCCSID (const UConverter * converter,
00551                UErrorCode * err);
00552 
00563 U_CAPI UConverterPlatform U_EXPORT2
00564 ucnv_getPlatform (const UConverter * converter,
00565                   UErrorCode * err);
00566 
00575 U_CAPI UConverterType U_EXPORT2
00576 ucnv_getType (const UConverter * converter);
00577 
00593 U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter, 
00594                                        UBool starters[256],
00595                                        UErrorCode* err);
00596 
00597 
00609 U_CAPI void U_EXPORT2
00610 ucnv_getToUCallBack (const UConverter * converter,
00611                      UConverterToUCallback *action,
00612                      void **context);
00613 
00625 U_CAPI void U_EXPORT2
00626 ucnv_getFromUCallBack (const UConverter * converter,
00627                        UConverterFromUCallback *action,
00628                        void **context);
00629 
00644 U_CAPI void U_EXPORT2
00645 ucnv_setToUCallBack (UConverter * converter,
00646                      UConverterToUCallback newAction,
00647                      void* newContext,
00648                      UConverterToUCallback *oldAction,
00649                      void** oldContext,
00650                      UErrorCode * err);
00651 
00666 U_CAPI void U_EXPORT2
00667 ucnv_setFromUCallBack (UConverter * converter,
00668                        UConverterFromUCallback newAction,
00669                        void *newContext,
00670                        UConverterFromUCallback *oldAction,
00671                        void **oldContext,
00672                        UErrorCode * err);
00673 
00727 U_CAPI void U_EXPORT2 
00728 ucnv_fromUnicode (UConverter * converter,
00729                   char **target,
00730                   const char *targetLimit,
00731                   const UChar ** source,
00732                   const UChar * sourceLimit,
00733                   int32_t* offsets,
00734                   UBool flush,
00735                   UErrorCode * err);
00736 
00737 
00793 U_CAPI void U_EXPORT2 
00794 ucnv_toUnicode (UConverter * converter,
00795                 UChar ** target,
00796                 const UChar * targetLimit,
00797                 const char **source,
00798                 const char *sourceLimit,
00799                 int32_t* offsets,
00800                 UBool flush,
00801                 UErrorCode * err);
00802 
00803 
00832 U_CAPI int32_t U_EXPORT2
00833 ucnv_fromUChars (const UConverter * converter,
00834                  char *target,
00835                  int32_t targetCapacity,
00836                  const UChar * source,
00837                  int32_t sourceLength,
00838                  UErrorCode * err);
00839 
00869 U_CAPI int32_t U_EXPORT2
00870 ucnv_toUChars (const UConverter * converter,
00871                UChar * target,
00872                int32_t targetCapacity,
00873                const char *source,
00874                int32_t sourceSize,
00875                UErrorCode * err);
00876 
00877 
00878 /********************************
00879  * Will convert a codepage buffer into unicode one character at a time.
00880  * <p>This function was written to be efficient when transcoding small
00881  * amounts of data at a time.
00882  * In that case it will be more efficient than \Ref{ucnv_toUnicode}.
00883  * When converting large buffers use \Ref{ucnv_toUnicode}.</p>
00884  *
00885  * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
00886  * There are two different kinds of codepages that provide mappings for surrogate characters:
00887  * <ul>
00888  *   <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
00889  *       code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
00890  *       Each valid sequence will result in exactly one returned code point.
00891  *       If a sequence results in a single surrogate, then that will be returned
00892  *       by itself, even if a neighboring sequence encodes the matching surrogate.</li>
00893  *   <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
00894  *       including surrogates. Code points in supplementary planes are represented with
00895  *       two sequences, each encoding a surrogate.
00896  *       For these codepages, matching pairs of surrogates will be combined into single
00897  *       code points for returning from this function.
00898  *       (Note that SCSU is actually a mix of these codepage types.)</li>
00899  * </ul></p>
00900  *
00901  * @param converter an open UConverter
00902  * @param source the address of a pointer to the codepage buffer, will be
00903  *  updated to point after the bytes consumed in the conversion call.
00904  * @param sourceLimit points to the end of the input buffer
00905  * @param err fills in error status (see ucnv_toUnicode)
00906  * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input 
00907  * is empty or does not convert to any output (e.g.: pure state-change 
00908  * codes SI/SO, escape sequences for ISO 2022,
00909  * or if the callback did not output anything, ...).
00910  * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
00911  *  the "buffer" is the return code. However, there might be subsequent output
00912  *  stored in the converter object
00913  * that will be returned in following calls to this function.
00914  * @return a UChar32 resulting from the partial conversion of source
00915  * @see ucnv_toUnicode
00916  * @see ucnv_toUChars
00917  * @see ucnv_convert
00918  * @stable
00919  */
00920 U_CAPI UChar32 U_EXPORT2
00921 ucnv_getNextUChar (UConverter * converter,
00922                    const char **source,
00923                    const char * sourceLimit,
00924                    UErrorCode * err);
00925 
00926 
00927 /**************************
00928 * Will convert a sequence of bytes from one codepage to another.
00929 * This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
00930 * use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency.
00931 * @param toConverterName The name of the converter that will be used
00932 *  in conversion from unicode into the output buffer
00933 * @param fromConverterName: The name of the converter that will be used
00934 *  in conversion from the source buffer into intermediate unicode.
00935 * @param target Pointer to the output buffer
00936 * @param targetCapacity capacity of the target, in bytes
00937 * @param source Pointer to the input buffer
00938 * @param sourceLength capacity of the source, in bytes
00939 * @param err error status. 
00940 * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is still input left in the source.
00941 * @return  will be filled in with the number of bytes needed in target
00942 * @see ucnv_fromUnicode
00943 * @see ucnv_toUnicode
00944 * @see ucnv_fromUChars
00945 * @see ucnv_toUChars
00946 * @see ucnv_getNextUChar
00947 * @draft backslash versus Yen sign in shift-JIS
00948 */
00949 U_CAPI int32_t U_EXPORT2
00950 ucnv_convert (const char *toConverterName,
00951               const char *fromConverterName,
00952               char *target,
00953               int32_t targetCapacity,
00954               const char *source,
00955               int32_t sourceLength,
00956               UErrorCode * err);
00957 
00958 
00967 U_CAPI int32_t U_EXPORT2
00968 ucnv_flushCache (void);
00969 
00970 
00978 U_CAPI int32_t U_EXPORT2
00979 ucnv_countAvailable (void);
00980 
00989 U_CAPI const char* U_EXPORT2
00990 ucnv_getAvailableName (int32_t n);
00991 
01001 U_CAPI uint16_t
01002 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
01003 
01015 U_CAPI const char *
01016 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
01017 
01030 U_CAPI void
01031 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
01032 
01038 U_CAPI uint16_t U_EXPORT2
01039 ucnv_countStandards(void);
01040 
01048 U_CAPI const char * U_EXPORT2
01049 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
01050 
01062 U_CAPI const char * U_EXPORT2
01063 ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
01064 
01075 U_CAPI const char * U_EXPORT2
01076 ucnv_getDefaultName (void);
01077 
01085 U_CAPI void U_EXPORT2
01086 ucnv_setDefaultName (const char *name);
01087 
01103 U_CAPI void U_EXPORT2
01104 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLen);
01105 
01113 U_CAPI UBool U_EXPORT2
01114 ucnv_isAmbiguous(const UConverter *cnv);
01115 
01123 U_CAPI void U_EXPORT2 ucnv_setFallback(UConverter *cnv, UBool usesFallback);
01124 
01130 U_CAPI UBool U_EXPORT2 ucnv_usesFallback(const UConverter *cnv);
01131 
01132 #endif
01133 /*_UCNV*/
01134 
01135 

Generated at Thu Mar 22 16:12:39 2001 for ICU 1.8 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000