Main Page   Class Hierarchy   Compound List   File List   Header Files   Sources   Compound Members   File Members  

ucnv_bld.h

00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1999, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 *
00008 *  ucnv_bld.h:
00009 *  Contains all internal and external data structure definitions
00010 * Created & Maitained by Bertrand A. Damiba
00011 *
00012 *
00013 *
00014 * ATTENTION:
00015 * ---------
00016 * Although the data structures in this file are open and stack allocatable
00017 * we reserve the right to hide them in further releases.
00018 */
00019 
00020 #ifndef UCNV_BLD_H
00021 #define UCNV_BLD_H
00022 
00023 #include "unicode/utypes.h"
00024 
00025 #define UCNV_MAX_SUBCHAR_LEN 4
00026 #define UCNV_ERROR_BUFFER_LENGTH 20
00027 #define UCNV_MAX_AMBIGUOUSCCSIDS 5
00028 
00029 #define UCNV_IMPLEMENTED_CONVERSION_TYPES 9
00030 /*Sentinel Value used to check the integrity of the binary data files */
00031 
00032 #define UCNV_FILE_CHECK_MARKER 0xBEDA
00033 
00034 /*maximum length of the converter names */
00035 #define UCNV_MAX_CONVERTER_NAME_LENGTH 60
00036 #define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
00037 
00038 /*Pointer to the aforementioned file */
00039 #define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
00040 
00041 #define  UCNV_SI 0x0F           /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
00042 #define  UCNV_SO 0x0E           /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
00043 
00044 typedef enum {
00045     UCNV_UNSUPPORTED_CONVERTER = -1,
00046     UCNV_SBCS = 0,
00047     UCNV_DBCS = 1,
00048     UCNV_MBCS = 2,
00049     UCNV_LATIN_1 = 3,
00050     UCNV_UTF8 = 4,
00051     UCNV_UTF16_BigEndian = 5,
00052     UCNV_UTF16_LittleEndian = 6,
00053     UCNV_EBCDIC_STATEFUL = 7,
00054     UCNV_ISO_2022 = 8,
00055     
00056     UCNV_LMBCS_1 = 9,
00057     UCNV_LMBCS_2, 
00058     UCNV_LMBCS_3,               
00059     UCNV_LMBCS_4,
00060     UCNV_LMBCS_5,
00061     UCNV_LMBCS_6,
00062     UCNV_LMBCS_8,
00063     UCNV_LMBCS_11,
00064     UCNV_LMBCS_16,
00065     UCNV_LMBCS_17,
00066     UCNV_LMBCS_18,
00067     UCNV_LMBCS_19,
00068     UCNV_LMBCS_LAST = UCNV_LMBCS_19,
00069 
00070     /* Number of converter types for which we have conversion routines. */
00071     UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = UCNV_LMBCS_LAST+1
00072    
00073 } UConverterType;
00074 
00075 /* ### move the following typedef and array into implementation files! */
00076 typedef struct
00077 {
00078     int32_t ccsid;
00079     UChar mismapped;
00080     UChar replacement;
00081 } UAmbiguousConverter;
00082 
00083 static const UAmbiguousConverter UCNV_AMBIGUOUSCONVERTERS[UCNV_MAX_AMBIGUOUSCCSIDS] =
00084 {
00085     { 943, 0x00A5, 0x005C },
00086     { 949, 0x20A9, 0x005C },
00087     { 1361, 0x20A9, 0x005C },
00088     { 942, 0x00A5, 0x005C },
00089     { 1363, 0x20A9, 0x005C }
00090 };
00091 
00092 typedef enum {
00093     UCNV_UNKNOWN = -1,
00094     UCNV_IBM = 0
00095 } UConverterPlatform;
00096 
00097 U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
00098                  itself is compiled under C++, the linkage of the funcptrs will
00099                  work.
00100               */
00101 
00102 union UConverterTable;
00103 typedef union UConverterTable UConverterTable;
00104 
00105 struct UConverterImpl;
00106 typedef struct UConverterImpl UConverterImpl;
00107 
00108 /* ###
00109  * Markus Scherer on 2000feb04:
00110  * I have change UConverter and UConverterSharedData; there may be more changes,
00111  * or we may decide to roll back the structure definitions to what they were
00112  * before, with the additional UConverterImpl field and the new semantics for
00113  * referenceCounter.
00114  *
00115  * Reasons for changes: Attempt at performance improvements, especially
00116  * a) decrease amount of internal, implicit padding by reordering the fields
00117  * b) save space by storing the internal name of the converter only with a
00118  *    pointer instead of an array
00119  *
00120  * In addition to that, I added the UConverterImpl field for better
00121  * modularizing the code and making it more maintainable. It may actually
00122  * become slightly faster by doing this.
00123  *
00124  * I changed the UConverter.to|fromUnicodeStatus to be unsigned because
00125  * the defaultValues.toUnicodeStatus is unsigned, and it seemed to be a safer choice.
00126  *
00127  * Ultimately, I would prefer not to expose these definitions any more at all,
00128  * but this is suspect to discussions, proposals and design reviews.
00129  *
00130  * I would personally like to see more information hiding (with helper APIs),
00131  * useful state fields in UConverter that are reserved for the callbacks,
00132  * and directly included structures instead of pointers to allocated
00133  * memory, like for UConverterTable and its variant fields.
00134  *
00135  * Also, with the more C++-like converter implementation,
00136  * the conversionType does not need to be in UConverterSharedData any more:
00137  * it is in UConverterImpl and hardly used.
00138  */
00139 
00140 typedef struct {
00141     uint32_t structSize;            /* Size of this structure */
00142     
00143     char name [UCNV_MAX_CONVERTER_NAME_LENGTH];               /* internal name of the converter- invariant chars */
00144 
00145     int32_t codepage;               /* codepage # (now IBM-$codepage) */
00146 
00147     int8_t platform;                /* platform of the converter (only IBM now) */
00148     int8_t conversionType;          /* conversion type */
00149 
00150     int8_t minBytesPerChar;         /* Minimum # bytes per char in this codepage */
00151     int8_t maxBytesPerChar;         /* Maximum # bytes per char in this codepage */
00152 
00153     int8_t subCharLen;
00154   
00155     uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; 
00156     uint8_t hasToUnicodeFallback;              /* UBool needs to be changed to UBool to be consistent across platform */
00157     uint8_t hasFromUnicodeFallback;
00158     uint8_t reserved[19];  /* to round out the structure */
00159 
00160 } UConverterStaticData;
00161 
00162 /*
00163  * Defines the UConverterSharedData struct,
00164  * the immutable, shared part of UConverter.
00165  */
00166 typedef struct {
00167     uint32_t structSize;            /* Size of this structure */
00168     uint32_t referenceCounter;      /* used to count number of clients, 0xffffffff for static SharedData */
00169 
00170     const void *dataMemory;         /* from udata_openChoice() */
00171     UConverterTable *table;         /* Pointer to conversion data */
00172 
00173     const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
00174     UBool                staticDataOwned; /* T if we own the staticData */
00175     const UConverterImpl *impl;     /* vtable-style struct of mostly function pointers */
00176 
00177     /*initial values of some members of the mutable part of object */
00178     uint32_t toUnicodeStatus;
00179 } UConverterSharedData;
00180 
00181 
00182 /* Defines a UConverter, the lightweight mutable part the user sees */
00183 
00184 struct UConverter {
00185     uint32_t toUnicodeStatus;           /* Used to internalize stream status information */
00186     uint32_t fromUnicodeStatus;
00187     int32_t mode;
00188     UBool  useFallback;
00189 
00190     int8_t subCharLen;                  /* length of the codepage specific character sequence */
00191     int8_t invalidCharLength;
00192     int8_t invalidUCharLength;
00193     int8_t charErrorBufferLength;       /* number of valid bytes in charErrorBuffer */
00194     int8_t UCharErrorBufferLength;      /* number of valid UChars in charErrorBuffer */
00195 
00196     uint8_t subChar[UCNV_MAX_SUBCHAR_LEN];              /* codepage specific character sequence */
00197     char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
00198     uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH];  /* codepage output from Error functions */
00199 
00200     UChar invalidUCharBuffer[3];
00201     UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH];   /* unicode output from Error functions */
00202 
00203     /*
00204      * Error function pointer called when conversion issues
00205      * occur during a T_UConverter_fromUnicode call
00206      */
00207     void (*fromUCharErrorBehaviour) (struct UConverter *,
00208                                      char **,
00209                                      const char *,
00210                                      const UChar **,
00211                                      const UChar *,
00212                                      int32_t* offsets,
00213                                      UBool,
00214                                      UErrorCode *);
00215     /*
00216      * Error function pointer called when conversion issues
00217      * occur during a T_UConverter_toUnicode call
00218      */
00219     void (*fromCharErrorBehaviour) (struct UConverter *,
00220                                     UChar **,
00221                                     const UChar *,
00222                                     const char **,
00223                                     const char *,
00224                                     int32_t* offsets,
00225                                     UBool,
00226                                     UErrorCode *);
00227 
00228     UConverterSharedData *sharedData;   /* Pointer to the shared immutable part of the converter object */
00229 
00230     /*
00231      * currently only used to point to a struct containing UConverter used by iso 2022;
00232      * could be used by clients writing their own call back function to pass context to them
00233      */
00234     void *extraInfo;
00235 };
00236 
00237 U_CDECL_END /* end of UConverter */
00238 
00239 typedef struct UConverter UConverter;
00240 
00241 
00242 typedef struct
00243   {
00244     UConverter *currentConverter;
00245     uint8_t escSeq2022[10];
00246     int8_t escSeq2022Length;
00247   }
00248 UConverterDataISO2022;
00249 
00250 
00251 typedef struct
00252   {
00253     UConverter *OptGrpConverter[0x20];    /* Converter per Opt. grp. */
00254     uint8_t    OptGroup;                  /* default Opt. grp. for this LMBCS session */
00255     uint8_t    localeConverterIndex;      /* reasonable locale match for index */
00256 
00257   }
00258 UConverterDataLMBCS;
00259 
00260 
00261 #define CONVERTER_FILE_EXTENSION ".cnv"
00262 
00263 #endif /* _UCNV_BLD */
00264 
00265 
00266 
00267 
00268 

Generated at Mon Jun 5 12:53:06 2000 for ICU1.5 by doxygen 1.0.0 written by Dimitri van Heesch, © 1997-1999