Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

ucnv.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1999, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006  *  ucnv.h:
00007  *  External APIs for the ICU's codeset conversion library
00008  *  Bertrand A. Damiba
00009  *
00010  * Modification History:
00011  *
00012  *   Date        Name        Description
00013  *   04/04/99    helena      Fixed internal header inclusion.
00014  *   05/11/00    helena      Added setFallback and usesFallback APIs.
00015  *   06/29/2000  helena      Major rewrite of the callback APIs.
00016  *   12/07/2000  srl         Update of documentation
00017  */
00018 
00027 #ifndef UCNV_H
00028 #define UCNV_H
00029 
00030 /* Forward declaring the UConverter structure */
00031 struct UConverter;
00032 typedef struct UConverter UConverter;
00033 
00034 #include "unicode/utypes.h"
00035 #include "unicode/ucnv_err.h"
00036 
00037 U_CDECL_BEGIN
00038 
00039 /* maximum length of the converter names */
00040 #define UCNV_MAX_CONVERTER_NAME_LENGTH 60
00041 #define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
00042 
00043 #define  UCNV_SI 0x0F           /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
00044 #define  UCNV_SO 0x0E           /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
00045 
00051 typedef enum {
00052     UCNV_UNSUPPORTED_CONVERTER = -1,
00053     UCNV_SBCS = 0,
00054     UCNV_DBCS = 1,
00055     UCNV_MBCS = 2,
00056     UCNV_LATIN_1 = 3,
00057     UCNV_UTF8 = 4,
00058     UCNV_UTF16_BigEndian = 5,
00059     UCNV_UTF16_LittleEndian = 6,
00060     UCNV_UTF32_BigEndian = 7,
00061     UCNV_UTF32_LittleEndian = 8,
00062     UCNV_EBCDIC_STATEFUL = 9,
00063     UCNV_ISO_2022 = 10,
00064 
00065     UCNV_LMBCS_1 = 11,
00066     UCNV_LMBCS_2, 
00067     UCNV_LMBCS_3,               
00068     UCNV_LMBCS_4,
00069     UCNV_LMBCS_5,
00070     UCNV_LMBCS_6,
00071     UCNV_LMBCS_8,
00072     UCNV_LMBCS_11,
00073     UCNV_LMBCS_16,
00074     UCNV_LMBCS_17,
00075     UCNV_LMBCS_18,
00076     UCNV_LMBCS_19,
00077     UCNV_HZ,
00078     UCNV_LMBCS_LAST = UCNV_HZ,
00079 
00080     /* Number of converter types for which we have conversion routines. */
00081     UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = UCNV_LMBCS_LAST+1
00082    
00083 } UConverterType;
00084 
00091 typedef enum {
00092     UCNV_UNKNOWN = -1,
00093     UCNV_IBM = 0
00094 } UConverterPlatform;
00095 
00108 typedef void (*UConverterToUCallback) (
00109                   void* context,
00110                   UConverterToUnicodeArgs *args,
00111                   const char *codePoints,
00112                   int32_t length,
00113                   UConverterCallbackReason reason,
00114                                   UErrorCode *);
00115 
00128 typedef void (*UConverterFromUCallback) (
00129                     void* context,
00130                     UConverterFromUnicodeArgs *args,
00131                     const UChar* codeUnits,
00132                     int32_t length,
00133                     UChar32 codePoint,
00134                     UConverterCallbackReason reason,
00135                                     UErrorCode *);
00136 
00137 U_CDECL_END
00138 
00143 #define UCNV_OPTION_SEP_CHAR ','
00144 
00149 #define UCNV_OPTION_SEP_STRING ","
00150 
00155 #define UCNV_VALUE_SEP_CHAR '='
00156 
00161 #define UCNV_VALUE_SEP_STRING "="
00162 
00167 #define UCNV_LOCALE_OPTION_STRING ",locale="
00168 
00182 U_CAPI int U_EXPORT2
00183 ucnv_compareNames(const char *name1, const char *name2);
00184 
00185 
00214 U_CAPI UConverter* U_EXPORT2 
00215 ucnv_open   (const char *converterName, UErrorCode * err);
00216 
00217 
00239 U_CAPI UConverter* U_EXPORT2 
00240 ucnv_openU (const UChar * name,
00241             UErrorCode * err);
00242 
00258 U_CAPI UConverter* U_EXPORT2
00259 ucnv_openCCSID (int32_t codepage,
00260                 UConverterPlatform platform,
00261                 UErrorCode * err);
00262 
00275 U_CAPI void  U_EXPORT2
00276 ucnv_close (UConverter * converter);
00277 
00293 U_CAPI void U_EXPORT2
00294 ucnv_getSubstChars (const UConverter * converter,
00295                     char *subChars,
00296                     int8_t * len,
00297                     UErrorCode * err);
00298 
00312 U_CAPI void U_EXPORT2
00313 ucnv_setSubstChars (UConverter * converter,
00314                     const char *subChars,
00315                     int8_t len,
00316                     UErrorCode * err);
00317 
00332 U_CAPI void U_EXPORT2
00333 ucnv_getInvalidChars (const UConverter * converter,
00334                       char *errBytes,
00335                       int8_t * len,
00336                       UErrorCode * err);
00337 
00352 U_CAPI void U_EXPORT2
00353 ucnv_getInvalidUChars (const UConverter * converter,
00354                        UChar *errUChars,
00355                        int8_t * len,
00356                        UErrorCode * err);
00357 
00365 U_CAPI void U_EXPORT2
00366 ucnv_reset (UConverter * converter);
00367 
00377 U_CAPI void
00378 ucnv_resetToUnicode(UConverter *converter);
00379 
00388 U_CAPI void
00389 ucnv_resetFromUnicode(UConverter *converter);
00390 
00399 U_CAPI int8_t U_EXPORT2
00400 ucnv_getMaxCharSize (const UConverter * converter);
00401 
00402 
00411 U_CAPI int8_t U_EXPORT2
00412 ucnv_getMinCharSize (const UConverter * converter);
00413 
00428 U_CAPI int32_t U_EXPORT2
00429 ucnv_getDisplayName (const UConverter * converter,
00430                      const char *displayLocale,
00431                      UChar * displayName,
00432                      int32_t displayNameCapacity,
00433                      UErrorCode * err);
00434 
00446 U_CAPI const char * U_EXPORT2 
00447 ucnv_getName (const UConverter * converter, UErrorCode * err);
00448 
00449 
00464 U_CAPI int32_t U_EXPORT2
00465 ucnv_getCCSID (const UConverter * converter,
00466                UErrorCode * err);
00467 
00478 U_CAPI UConverterPlatform U_EXPORT2
00479 ucnv_getPlatform (const UConverter * converter,
00480                   UErrorCode * err);
00481 
00490 U_CAPI UConverterType U_EXPORT2
00491 ucnv_getType (const UConverter * converter);
00492 
00508 U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter, 
00509                                      UBool starters[256],
00510                                      UErrorCode* err);
00511 
00512 
00524 U_CAPI void U_EXPORT2
00525 ucnv_getToUCallBack (const UConverter * converter,
00526                      UConverterToUCallback *action,
00527                      void **context);
00528 
00540 U_CAPI void U_EXPORT2
00541 ucnv_getFromUCallBack (const UConverter * converter,
00542                        UConverterFromUCallback *action,
00543                        void **context);
00544 
00559 U_CAPI void U_EXPORT2
00560 ucnv_setToUCallBack (UConverter * converter,
00561                      UConverterToUCallback newAction,
00562                      void* newContext,
00563                      UConverterToUCallback *oldAction,
00564                      void** oldContext,
00565                      UErrorCode * err);
00566 
00581 U_CAPI void U_EXPORT2
00582 ucnv_setFromUCallBack (UConverter * converter,
00583                        UConverterFromUCallback newAction,
00584                        void *newContext,
00585                        UConverterFromUCallback *oldAction,
00586                        void **oldContext,
00587                        UErrorCode * err);
00588 
00642 U_CAPI void U_EXPORT2 
00643 ucnv_fromUnicode (UConverter * converter,
00644                   char **target,
00645                   const char *targetLimit,
00646                   const UChar ** source,
00647                   const UChar * sourceLimit,
00648                   int32_t* offsets,
00649                   UBool flush,
00650                   UErrorCode * err);
00651 
00652 
00708 U_CAPI void U_EXPORT2 
00709 ucnv_toUnicode (UConverter * converter,
00710                 UChar ** target,
00711                 const UChar * targetLimit,
00712                 const char **source,
00713                 const char *sourceLimit,
00714                 int32_t* offsets,
00715                 UBool flush,
00716                 UErrorCode * err);
00717 
00718 
00747 U_CAPI int32_t U_EXPORT2
00748 ucnv_fromUChars (const UConverter * converter,
00749                  char *target,
00750                  int32_t targetCapacity,
00751                  const UChar * source,
00752                  int32_t sourceLength,
00753                  UErrorCode * err);
00754 
00784 U_CAPI int32_t U_EXPORT2
00785 ucnv_toUChars (const UConverter * converter,
00786                UChar * target,
00787                int32_t targetCapacity,
00788                const char *source,
00789                int32_t sourceSize,
00790                UErrorCode * err);
00791 
00792 
00793 /********************************
00794  * Will convert a codepage buffer into unicode one character at a time.
00795  * <p>This function was written to be efficient when transcoding small
00796  * amounts of data at a time.
00797  * In that case it will be more efficient than \Ref{ucnv_toUnicode}.
00798  * When converting large buffers use \Ref{ucnv_toUnicode}.</p>
00799  *
00800  * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
00801  * There are two different kinds of codepages that provide mappings for surrogate characters:
00802  * <ul>
00803  *   <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
00804  *       code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
00805  *       Each valid sequence will result in exactly one returned code point.
00806  *       If a sequence results in a single surrogate, then that will be returned
00807  *       by itself, even if a neighboring sequence encodes the matching surrogate.</li>
00808  *   <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
00809  *       including surrogates. Code points in supplementary planes are represented with
00810  *       two sequences, each encoding a surrogate.
00811  *       For these codepages, matching pairs of surrogates will be combined into single
00812  *       code points for returning from this function.
00813  *       (Note that SCSU is actually a mix of these codepage types.)</li>
00814  * </ul></p>
00815  *
00816  * @param converter an open UConverter
00817  * @param source the address of a pointer to the codepage buffer, will be
00818  *  updated to point after the bytes consumed in the conversion call.
00819  * @param sourceLimit points to the end of the input buffer
00820  * @param err fills in error status (see ucnv_toUnicode)
00821  * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input 
00822  * is empty or does not convert to any output (e.g.: pure state-change 
00823  * codes SI/SO, escape sequences for ISO 2022,
00824  * or if the callback did not output anything, ...).
00825  * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
00826  *  the "buffer" is the return code. However, there might be subsequent output
00827  *  stored in the converter object
00828  * that will be returned in following calls to this function.
00829  * @return a UChar32 resulting from the partial conversion of source
00830  * @see ucnv_toUnicode
00831  * @see ucnv_toUChars
00832  * @see ucnv_convert
00833  * @stable
00834  */
00835 U_CAPI UChar32 U_EXPORT2
00836 ucnv_getNextUChar (UConverter * converter,
00837                    const char **source,
00838                    const char * sourceLimit,
00839                    UErrorCode * err);
00840 
00841 
00842 /**************************
00843 * Will convert a sequence of bytes from one codepage to another.
00844 * This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
00845 * use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency.
00846 * @param toConverterName The name of the converter that will be used
00847 *  in conversion from unicode into the output buffer
00848 * @param fromConverterName: The name of the converter that will be used
00849 *  in conversion from the source buffer into intermediate unicode.
00850 * @param target Pointer to the output buffer
00851 * @param targetCapacity capacity of the target, in bytes
00852 * @param source Pointer to the input buffer
00853 * @param sourceLength capacity of the source, in bytes
00854 * @param err error status. 
00855 * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is still input left in the source.
00856 * @return  will be filled in with the number of bytes needed in target
00857 * @see ucnv_fromUnicode
00858 * @see ucnv_toUnicode
00859 * @see ucnv_fromUChars
00860 * @see ucnv_toUChars
00861 * @see ucnv_getNextUChar
00862 * @draft backslash versus Yen sign in shift-JIS
00863 */
00864 U_CAPI int32_t U_EXPORT2
00865 ucnv_convert (const char *toConverterName,
00866               const char *fromConverterName,
00867               char *target,
00868               int32_t targetCapacity,
00869               const char *source,
00870               int32_t sourceLength,
00871               UErrorCode * err);
00872 
00873 
00882 U_CAPI int32_t U_EXPORT2
00883 ucnv_flushCache (void);
00884 
00885 
00893 U_CAPI int32_t U_EXPORT2
00894 ucnv_countAvailable (void);
00895 
00904 U_CAPI const char* U_EXPORT2
00905 ucnv_getAvailableName (int32_t n);
00906 
00916 U_CAPI uint16_t
00917 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
00918 
00930 U_CAPI const char *
00931 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
00932 
00945 U_CAPI void
00946 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
00947 
00953 U_CAPI uint16_t U_EXPORT2
00954 ucnv_countStandards(void);
00955 
00963 U_CAPI const char * U_EXPORT2
00964 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
00965 
00977 U_CAPI const char * U_EXPORT2
00978 ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
00979 
00990 U_CAPI const char * U_EXPORT2
00991 ucnv_getDefaultName (void);
00992 
01000 U_CAPI void U_EXPORT2
01001 ucnv_setDefaultName (const char *name);
01002 
01018 U_CAPI void U_EXPORT2
01019 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source, int32_t sourceLen);
01020 
01028 U_CAPI UBool U_EXPORT2
01029 ucnv_isAmbiguous(const UConverter *cnv);
01030 
01038 U_CAPI void U_EXPORT2 ucnv_setFallback(UConverter *cnv, UBool usesFallback);
01039 
01045 U_CAPI UBool U_EXPORT2 ucnv_usesFallback(const UConverter *cnv);
01046 
01047 #endif
01048 /*_UCNV*/
01049 
01050 

Generated at Fri Dec 15 12:12:36 2000 for ICU 1.7 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000