00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1999, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * 00008 * ucnv_bld.h: 00009 * Contains all internal and external data structure definitions 00010 * Created & Maitained by Bertrand A. Damiba 00011 * 00012 * 00013 * 00014 * ATTENTION: 00015 * --------- 00016 * Although the data structures in this file are open and stack allocatable 00017 * we reserve the right to hide them in further releases. 00018 */ 00019 00020 #ifndef UCNV_BLD_H 00021 #define UCNV_BLD_H 00022 00023 #include "unicode/utypes.h" 00024 00025 #define UCNV_MAX_SUBCHAR_LEN 4 00026 #define UCNV_ERROR_BUFFER_LENGTH 20 00027 #define UCNV_MAX_AMBIGUOUSCCSIDS 5 00028 00029 #define UCNV_IMPLEMENTED_CONVERSION_TYPES 9 00030 /*Sentinel Value used to check the integrity of the binary data files */ 00031 00032 #define UCNV_FILE_CHECK_MARKER 0xBEDA 00033 00034 /*maximum length of the converter names */ 00035 #define UCNV_MAX_CONVERTER_NAME_LENGTH 60 00036 #define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH) 00037 00038 /*Pointer to the aforementioned file */ 00039 #define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400) 00040 00041 #define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */ 00042 #define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */ 00043 00044 typedef enum { 00045 UCNV_UNSUPPORTED_CONVERTER = -1, 00046 UCNV_SBCS = 0, 00047 UCNV_DBCS = 1, 00048 UCNV_MBCS = 2, 00049 UCNV_LATIN_1 = 3, 00050 UCNV_UTF8 = 4, 00051 UCNV_UTF16_BigEndian = 5, 00052 UCNV_UTF16_LittleEndian = 6, 00053 UCNV_EBCDIC_STATEFUL = 7, 00054 UCNV_ISO_2022 = 8, 00055 00056 UCNV_LMBCS_1 = 9, 00057 UCNV_LMBCS_2, 00058 UCNV_LMBCS_3, 00059 UCNV_LMBCS_4, 00060 UCNV_LMBCS_5, 00061 UCNV_LMBCS_6, 00062 UCNV_LMBCS_8, 00063 UCNV_LMBCS_11, 00064 UCNV_LMBCS_16, 00065 UCNV_LMBCS_17, 00066 UCNV_LMBCS_18, 00067 UCNV_LMBCS_19, 00068 UCNV_LMBCS_LAST = UCNV_LMBCS_19, 00069 00070 /* Number of converter types for which we have conversion routines. */ 00071 UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = UCNV_LMBCS_LAST+1 00072 00073 } UConverterType; 00074 00075 /* ### move the following typedef and array into implementation files! */ 00076 typedef struct 00077 { 00078 int32_t ccsid; 00079 UChar mismapped; 00080 UChar replacement; 00081 } UAmbiguousConverter; 00082 00083 static const UAmbiguousConverter UCNV_AMBIGUOUSCONVERTERS[UCNV_MAX_AMBIGUOUSCCSIDS] = 00084 { 00085 { 943, 0x00A5, 0x005C }, 00086 { 949, 0x20A9, 0x005C }, 00087 { 1361, 0x20A9, 0x005C }, 00088 { 942, 0x00A5, 0x005C }, 00089 { 1363, 0x20A9, 0x005C } 00090 }; 00091 00092 typedef enum { 00093 UCNV_UNKNOWN = -1, 00094 UCNV_IBM = 0 00095 } UConverterPlatform; 00096 00097 U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv 00098 itself is compiled under C++, the linkage of the funcptrs will 00099 work. 00100 */ 00101 00102 union UConverterTable; 00103 typedef union UConverterTable UConverterTable; 00104 00105 struct UConverterImpl; 00106 typedef struct UConverterImpl UConverterImpl; 00107 00108 /* ### 00109 * Markus Scherer on 2000feb04: 00110 * I have change UConverter and UConverterSharedData; there may be more changes, 00111 * or we may decide to roll back the structure definitions to what they were 00112 * before, with the additional UConverterImpl field and the new semantics for 00113 * referenceCounter. 00114 * 00115 * Reasons for changes: Attempt at performance improvements, especially 00116 * a) decrease amount of internal, implicit padding by reordering the fields 00117 * b) save space by storing the internal name of the converter only with a 00118 * pointer instead of an array 00119 * 00120 * In addition to that, I added the UConverterImpl field for better 00121 * modularizing the code and making it more maintainable. It may actually 00122 * become slightly faster by doing this. 00123 * 00124 * I changed the UConverter.to|fromUnicodeStatus to be unsigned because 00125 * the defaultValues.toUnicodeStatus is unsigned, and it seemed to be a safer choice. 00126 * 00127 * Ultimately, I would prefer not to expose these definitions any more at all, 00128 * but this is suspect to discussions, proposals and design reviews. 00129 * 00130 * I would personally like to see more information hiding (with helper APIs), 00131 * useful state fields in UConverter that are reserved for the callbacks, 00132 * and directly included structures instead of pointers to allocated 00133 * memory, like for UConverterTable and its variant fields. 00134 * 00135 * Also, with the more C++-like converter implementation, 00136 * the conversionType does not need to be in UConverterSharedData any more: 00137 * it is in UConverterImpl and hardly used. 00138 */ 00139 00140 typedef struct { 00141 uint32_t structSize; /* Size of this structure */ 00142 00143 char name [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* internal name of the converter- invariant chars */ 00144 00145 int32_t codepage; /* codepage # (now IBM-$codepage) */ 00146 00147 int8_t platform; /* platform of the converter (only IBM now) */ 00148 int8_t conversionType; /* conversion type */ 00149 00150 int8_t minBytesPerChar; /* Minimum # bytes per char in this codepage */ 00151 int8_t maxBytesPerChar; /* Maximum # bytes per char in this codepage */ 00152 00153 int8_t subCharLen; 00154 00155 uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; 00156 uint8_t hasToUnicodeFallback; /* UBool needs to be changed to UBool to be consistent across platform */ 00157 uint8_t hasFromUnicodeFallback; 00158 uint8_t reserved[19]; /* to round out the structure */ 00159 00160 } UConverterStaticData; 00161 00162 /* 00163 * Defines the UConverterSharedData struct, 00164 * the immutable, shared part of UConverter. 00165 */ 00166 typedef struct { 00167 uint32_t structSize; /* Size of this structure */ 00168 uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */ 00169 00170 const void *dataMemory; /* from udata_openChoice() */ 00171 UConverterTable *table; /* Pointer to conversion data */ 00172 00173 const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ 00174 UBool staticDataOwned; /* T if we own the staticData */ 00175 const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ 00176 00177 /*initial values of some members of the mutable part of object */ 00178 uint32_t toUnicodeStatus; 00179 } UConverterSharedData; 00180 00181 00182 /* Defines a UConverter, the lightweight mutable part the user sees */ 00183 00184 struct UConverter { 00185 uint32_t toUnicodeStatus; /* Used to internalize stream status information */ 00186 uint32_t fromUnicodeStatus; 00187 int32_t mode; 00188 UBool useFallback; 00189 00190 int8_t subCharLen; /* length of the codepage specific character sequence */ 00191 int8_t invalidCharLength; 00192 int8_t invalidUCharLength; 00193 int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */ 00194 int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */ 00195 00196 uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */ 00197 char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; 00198 uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */ 00199 00200 UChar invalidUCharBuffer[3]; 00201 UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */ 00202 00203 /* 00204 * Error function pointer called when conversion issues 00205 * occur during a T_UConverter_fromUnicode call 00206 */ 00207 void (*fromUCharErrorBehaviour) (struct UConverter *, 00208 char **, 00209 const char *, 00210 const UChar **, 00211 const UChar *, 00212 int32_t* offsets, 00213 UBool, 00214 UErrorCode *); 00215 /* 00216 * Error function pointer called when conversion issues 00217 * occur during a T_UConverter_toUnicode call 00218 */ 00219 void (*fromCharErrorBehaviour) (struct UConverter *, 00220 UChar **, 00221 const UChar *, 00222 const char **, 00223 const char *, 00224 int32_t* offsets, 00225 UBool, 00226 UErrorCode *); 00227 00228 UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */ 00229 00230 /* 00231 * currently only used to point to a struct containing UConverter used by iso 2022; 00232 * could be used by clients writing their own call back function to pass context to them 00233 */ 00234 void *extraInfo; 00235 }; 00236 00237 U_CDECL_END /* end of UConverter */ 00238 00239 typedef struct UConverter UConverter; 00240 00241 00242 typedef struct 00243 { 00244 UConverter *currentConverter; 00245 uint8_t escSeq2022[10]; 00246 int8_t escSeq2022Length; 00247 } 00248 UConverterDataISO2022; 00249 00250 00251 typedef struct 00252 { 00253 UConverter *OptGrpConverter[0x20]; /* Converter per Opt. grp. */ 00254 uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ 00255 uint8_t localeConverterIndex; /* reasonable locale match for index */ 00256 00257 } 00258 UConverterDataLMBCS; 00259 00260 00261 #define CONVERTER_FILE_EXTENSION ".cnv" 00262 00263 #endif /* _UCNV_BLD */ 00264 00265 00266 00267 00268