Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnv_bld.c

00001 /*
00002  ********************************************************************
00003  * COPYRIGHT: 
00004  * Copyright (c) 1996-1999, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  *
00008  *  uconv_bld.c:
00009  *
00010  *  Defines functions that are used in the creation/initialization/deletion
00011  *  of converters and related structures.
00012  *  uses uconv_io.h routines to access disk information
00013  *  is used by ucnv.h to implement public API create/delete/flushCache routines
00014  * Modification History:
00015  * 
00016  *   Date        Name        Description
00017  * 
00018  *   06/20/2000  helena      OS/400 port changes; mostly typecast.
00019  *   06/29/2000  helena      Major rewrite of the callback interface.
00020 */
00021 
00022 
00023 #include "ucnv_io.h"
00024 #include "uhash.h"
00025 #include "ucmp16.h"
00026 #include "ucmp8.h"
00027 #include "ucnv_bld.h"
00028 #include "unicode/ucnv_err.h"
00029 #include "ucnv_cnv.h"
00030 #include "ucnv_imp.h"
00031 #include "unicode/udata.h"
00032 #include "unicode/ucnv.h"
00033 #include "umutex.h"
00034 #include "cstring.h"
00035 #include "cmemory.h"
00036 #include "filestrm.h"
00037 
00038 #include <stdio.h>
00039 
00040 #if 0
00041 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
00042 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
00043 #else
00044 # define UCNV_DEBUG_LOG(x,y,z)
00045 #endif
00046 
00047 static const UConverterSharedData *
00048 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
00049     &_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
00050     &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
00051     &_EBCDICStatefulData, &_ISO2022Data, 
00052     &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
00053     &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
00054     &_HZData,
00055 };
00056 
00057 static struct {
00058   const char *name;
00059   UConverterType type;
00060 } cnvNameType[] = {
00061   { "LATIN_1", UCNV_LATIN_1 },
00062   { "UTF8", UCNV_UTF8 },
00063   { "UTF16_BigEndian", UCNV_UTF16_BigEndian },
00064   { "UTF16_LittleEndian", UCNV_UTF16_LittleEndian },
00065 #if U_IS_BIG_ENDIAN
00066   { "UTF16_PlatformEndian", UCNV_UTF16_BigEndian },
00067   { "UTF16_OppositeEndian", UCNV_UTF16_LittleEndian },
00068 #else
00069   { "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian },
00070   { "UTF16_OppositeEndian", UCNV_UTF16_BigEndian},
00071 #endif
00072   { "UTF32_BigEndian", UCNV_UTF32_BigEndian },
00073   { "UTF32_LittleEndian", UCNV_UTF32_LittleEndian },
00074 #if U_IS_BIG_ENDIAN
00075   { "UTF32_PlatformEndian", UCNV_UTF32_BigEndian },
00076   { "UTF32_OppositeEndian", UCNV_UTF32_LittleEndian },
00077 #else
00078   { "UTF32_PlatformEndian", UCNV_UTF32_LittleEndian },
00079   { "UTF32_OppositeEndian", UCNV_UTF32_BigEndian},
00080 #endif
00081   { "ISO_2022", UCNV_ISO_2022 },
00082   { "LMBCS-1", UCNV_LMBCS_1 },
00083   { "LMBCS-2", UCNV_LMBCS_2 },
00084   { "LMBCS-3", UCNV_LMBCS_3 },
00085   { "LMBCS-4", UCNV_LMBCS_4 },
00086   { "LMBCS-5", UCNV_LMBCS_5 },
00087   { "LMBCS-6", UCNV_LMBCS_6 },
00088   { "LMBCS-8", UCNV_LMBCS_8 },
00089   { "LMBCS-11",UCNV_LMBCS_11 },
00090   { "LMBCS-16",UCNV_LMBCS_16 },
00091   { "LMBCS-17",UCNV_LMBCS_17 },
00092   { "LMBCS-18",UCNV_LMBCS_18 },
00093   { "LMBCS-19",UCNV_LMBCS_19 },
00094   { "HZ",UCNV_HZ }
00095 };
00096 
00097 
00098 /*Takes an alias name gets an actual converter file name
00099  *goes to disk and opens it.
00100  *allocates the memory and returns a new UConverter object
00101  */
00102 static UConverterSharedData *createConverterFromFile (const char *converterName, UErrorCode * err);
00103 
00104 static const UConverterSharedData *getAlgorithmicTypeFromName (const char *realName);
00105 
00109 U_CAPI  UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status);
00110 
00111 /*initializes some global variables */
00112 UHashtable *SHARED_DATA_HASHTABLE = NULL;
00113 
00114 #if 0
00115 /* For MEMORY LEAK checking.. */
00116 U_CAPI void U_EXPORT2 ucnv_orphanAllConverters()
00117 {
00118     SHARED_DATA_HASHTABLE = NULL; /* will leak: hashtable + hashtable elements */
00119 }
00120 #endif
00121 
00122 static UBool
00123 isCnvAcceptable(void *context,
00124              const char *type, const char *name,
00125              const UDataInfo *pInfo) {
00126     return (UBool)(
00127         pInfo->size>=20 &&
00128         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
00129         pInfo->charsetFamily==U_CHARSET_FAMILY &&
00130         pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
00131         pInfo->dataFormat[0]==0x63 &&   /* dataFormat="cnvt" */
00132         pInfo->dataFormat[1]==0x6e &&
00133         pInfo->dataFormat[2]==0x76 &&
00134         pInfo->dataFormat[3]==0x74 &&
00135         pInfo->formatVersion[0]==6);
00136 }
00137 
00138 #define DATA_TYPE "cnv"
00139 
00140 static UConverterSharedData *createConverterFromFile (const char *fileName, UErrorCode * err)
00141 {
00142   UDataMemory *data;
00143   UConverterSharedData *sharedData;
00144 
00145   if (err == NULL || U_FAILURE (*err)) {
00146     return NULL;
00147   }
00148 
00149   data = udata_openChoice(NULL, DATA_TYPE, fileName, isCnvAcceptable, NULL, err);
00150   if(U_FAILURE(*err))
00151     {
00152       return NULL;
00153     }
00154 
00155   sharedData = ucnv_data_unFlattenClone(data, err);
00156   if(U_FAILURE(*err))
00157     {
00158       udata_close(data);
00159       return NULL;
00160     }
00161 
00162   return sharedData;
00163 }
00164 
00165 void 
00166   copyPlatformString (char *platformString, UConverterPlatform pltfrm)
00167 {
00168   switch (pltfrm)
00169     {
00170     case UCNV_IBM:
00171       {
00172         uprv_strcpy (platformString, "ibm");
00173         break;
00174       }
00175     default:
00176       {
00177         uprv_strcpy (platformString, "");
00178         break;
00179       }
00180     };
00181 
00182   return;
00183 }
00184 
00185 /*returns a converter type from a string
00186  */
00187 static const UConverterSharedData *
00188   getAlgorithmicTypeFromName (const char *realName)
00189 {
00190   int i;
00191   for(i=0; i<sizeof(cnvNameType)/sizeof(cnvNameType[0]); ++i) {
00192     if(uprv_strcmp(realName, cnvNameType[i].name)==0) {
00193       return converterData[cnvNameType[i].type];
00194     }
00195   }
00196   return NULL;
00197 }
00198 
00199 /*Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
00200 void   shareConverterData (UConverterSharedData * data)
00201 {
00202   UErrorCode err = U_ZERO_ERROR;
00203   /*Lazy evaluates the Hashtable itself */
00204   /*void *sanity = NULL;*/
00205 
00206   if (SHARED_DATA_HASHTABLE == NULL)
00207     {
00208       UHashtable* myHT = uhash_openSize (uhash_hashIChars, uhash_compareIChars,
00209                                          ucnv_io_countAvailableAliases(&err),
00210                                          &err);
00211       if (U_FAILURE (err)) return;
00212       umtx_lock (NULL);
00213       if (SHARED_DATA_HASHTABLE == NULL) SHARED_DATA_HASHTABLE = myHT;
00214       else uhash_close(myHT);
00215       umtx_unlock (NULL);
00216       
00217     }
00218   umtx_lock (NULL);
00219   /* ### check to see if the element is not already there! */
00220 
00221 /*
00222     sanity =   getSharedConverterData (data->staticData->name);
00223     if(sanity != NULL)
00224     {
00225         UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity);
00226     }
00227     UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity);
00228 */
00229 
00230    uhash_put(SHARED_DATA_HASHTABLE,
00231     (void*) data->staticData->name, /* Okay to cast away const as long as
00232                                     keyDeleter == NULL */
00233             data,
00234             &err);
00235     UCNV_DEBUG_LOG("put",data->staticData->name,data);
00236   umtx_unlock (NULL);
00237 
00238   return;
00239 }
00240 
00241 UConverterSharedData *getSharedConverterData (const char *name)
00242 {
00243   /*special case when no Table has yet been created we return NULL */
00244   if (SHARED_DATA_HASHTABLE == NULL)    return NULL;
00245   else
00246     {
00247       UConverterSharedData *rc;
00248 
00249 umtx_lock(NULL);
00250       rc = (UConverterSharedData*)uhash_get (SHARED_DATA_HASHTABLE, name);
00251 umtx_unlock(NULL);
00252       UCNV_DEBUG_LOG("get",name,rc);
00253       return rc;
00254     }
00255 }
00256 
00257 /*frees the string of memory blocks associates with a sharedConverter
00258  *if and only if the referenceCounter == 0
00259  */
00260 UBool   deleteSharedConverterData (UConverterSharedData * deadSharedData)
00261 {
00262     if (deadSharedData->referenceCounter > 0)
00263         return FALSE;
00264     
00265     /* Note: if we have a dataMemory, then that means that all ucmp's came
00266        from udata, and their tables will go away at the end
00267        of this function. So, we need to simply dealloc the UCMP8's themselves.
00268        We're guaranteed that they do not allocate any further memory.
00269        
00270        When we have an API to simply 'init' a ucmp8, then no action at all will
00271        need to happen.   --srl 
00272 
00273        This means that the compact arrays would have to be static fields in
00274        UConverterSharedData, not pointers to allocated structures.
00275        Markus
00276     */
00277 
00278     if (deadSharedData->impl->unload != NULL) {
00279         deadSharedData->impl->unload(deadSharedData);
00280     }
00281 
00282     if(deadSharedData->dataMemory != NULL)
00283     {
00284         UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
00285         udata_close(data);
00286     }
00287 
00288     if(deadSharedData->table != NULL)
00289     {
00290         uprv_free(deadSharedData->table);
00291     }
00292 
00293     uprv_free (deadSharedData);
00294     
00295     return TRUE;
00296 }
00297 
00298 static void
00299 parseConverterOptions(const char *inName,
00300                       char *cnvName, char *locale, uint32_t *pFlags) {
00301     char c;
00302 
00303     /* copy the converter name itself to cnvName */
00304     while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
00305         *cnvName++=c;
00306         ++inName;
00307     }
00308     *cnvName=0;
00309 
00310     /* parse options */
00311     if(c==UCNV_OPTION_SEP_CHAR) {
00312         ++inName;
00313         for(;;) {
00314             /* inName is behind an option separator */
00315             if(uprv_strncmp(inName, "locale=", 7)==0) {
00316                 /* do not modify locale itself in case we have multiple locale options */
00317                 char *dest=locale;
00318 
00319                 /* copy the locale option value */
00320                 inName+=7;
00321                 for(;;) {
00322                     c=*inName;
00323                     if(c!=0) {
00324                         ++inName;
00325                         if(c!=UCNV_OPTION_SEP_CHAR) {
00326                             *dest++=c;
00327                         } else {
00328                             *dest=0;
00329                             break;
00330                         }
00331                     } else {
00332                         *dest=0;
00333                         return;
00334                     }
00335                 }
00336             } else {
00337                 /* ignore any other options until we define some */
00338                 for(;;) {
00339                     if(uprv_strncmp(inName, "version=", 8)==0) {
00340                         /*copy the version option value*/
00341                         inName+=8;
00342                         c=*inName;
00343                         if(c!=0){
00344                             ++inName;
00345                             if(c!=UCNV_OPTION_SEP_CHAR){
00346                                 *pFlags = c;
00347                             }
00348                             else{
00349                                 break;
00350                             }
00351                         }
00352                        
00353                     }
00354                     else {
00355                         return;
00356                     }
00357                 }
00358             }
00359         }
00360     }
00361 }
00362 
00363 /*Logic determines if the converter is Algorithmic AND/OR cached
00364  *depending on that:
00365  * -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
00366  * -Get it from a Hashtable (Data=X, Cached=TRUE)
00367  * -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
00368  * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
00369  */
00370 UConverter *
00371   createConverter (const char *converterName, UErrorCode * err)
00372 {
00373   char cnvName[100], locale[20];
00374   const char *realName;
00375   UConverter *myUConverter = NULL;
00376   UConverterSharedData *mySharedConverterData = NULL;
00377   UErrorCode internalErrorCode = U_ZERO_ERROR;
00378   uint32_t options=0;
00379   if (U_FAILURE (*err))
00380     return NULL;
00381 
00382   locale[0] = 0;
00383 
00384   /* In case "name" is NULL we want to open the default converter. */
00385   if (converterName == NULL) {
00386     realName = ucnv_io_getDefaultConverterName();
00387     if (realName == NULL) {
00388       *err = U_MISSING_RESOURCE_ERROR;
00389       return NULL;
00390     }
00391     /* the default converter name is already canonical */
00392   } else {
00393     /* separate the converter name from the options */
00394     parseConverterOptions(converterName, cnvName, locale,&options);
00395 
00396     /* get the canonical converter name */
00397     realName = ucnv_io_getConverterName(cnvName, &internalErrorCode);
00398     if (U_FAILURE(internalErrorCode) || realName == NULL) {
00399       /*
00400        * set the input name in case the converter was added
00401        * without updating the alias table, or when there is no alias table
00402        */
00403       realName = cnvName;
00404     }
00405   }
00406 
00407   /* separate the converter name from the options */
00408   if(realName != cnvName) {
00409     parseConverterOptions(realName, cnvName, locale,&options);
00410     realName = cnvName;
00411   }
00412 
00413   /* get the shared data for an algorithmic converter, if it is one */
00414   mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName (realName);
00415   if (mySharedConverterData == NULL)
00416     {
00417       /* it is a data-based converter, get its shared data */
00418       mySharedConverterData = getSharedConverterData (realName);
00419       if (mySharedConverterData == NULL)
00420         {
00421           /*Not cached, we need to stream it in from file */
00422           mySharedConverterData = createConverterFromFile (realName, err);
00423           if (U_FAILURE (*err) || (mySharedConverterData == NULL))
00424             {
00425               return NULL;
00426             }
00427           else
00428             {
00429               /* share it with other library clients */
00430               shareConverterData (mySharedConverterData);
00431             }
00432         }
00433       else
00434         {
00435           /* ### this is unsafe: the shared data could have been deleted since sharing or getting it - these operations should increase the counter! */
00436           /* update the reference counter: one more client */
00437           umtx_lock (NULL);
00438           mySharedConverterData->referenceCounter++;
00439           umtx_unlock (NULL);
00440         }
00441     }
00442 
00443   /* allocate the converter */
00444   myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
00445   if (myUConverter == NULL)
00446     {
00447       if (mySharedConverterData->referenceCounter != ~0) {
00448         umtx_lock (NULL);
00449         --mySharedConverterData->referenceCounter;
00450         umtx_unlock (NULL);
00451       }
00452       *err = U_MEMORY_ALLOCATION_ERROR;
00453       return NULL;
00454     }
00455 
00456   /* initialize the converter */
00457   uprv_memset(myUConverter, 0, sizeof(UConverter));
00458   myUConverter->sharedData = mySharedConverterData;
00459   myUConverter->mode = UCNV_SI;
00460   myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
00461   myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
00462   myUConverter->toUnicodeStatus = myUConverter->sharedData->toUnicodeStatus;
00463   myUConverter->subCharLen = myUConverter->sharedData->staticData->subCharLen;
00464   uprv_memcpy (myUConverter->subChar, myUConverter->sharedData->staticData->subChar, myUConverter->subCharLen);
00465 
00466   if(myUConverter != NULL && myUConverter->sharedData->impl->open != NULL) {
00467     myUConverter->sharedData->impl->open(myUConverter, realName, locale,options, err);
00468     if(U_FAILURE(*err)) {
00469       ucnv_close(myUConverter);
00470       return NULL;
00471     }
00472   }
00473 
00474   return myUConverter;
00475 }
00476 
00477 UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
00478 {
00479     /* UDataInfo info; -- necessary only if some converters have different formatVersion */
00480     const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
00481     const UConverterStaticData *source = (const UConverterStaticData *) raw;
00482     UConverterSharedData *data;
00483     UConverterType type = (UConverterType)source->conversionType;
00484 
00485     if(U_FAILURE(*status))
00486         return NULL;
00487 
00488     if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
00489         converterData[type]->referenceCounter != 1 ||
00490         source->structSize != sizeof(UConverterStaticData))
00491     {
00492         *status = U_INVALID_TABLE_FORMAT;
00493         return NULL;
00494     }
00495 
00496 #if 0
00497     /* necessary only if some converters have different formatVersion; now everything is at version 5 */
00498     /* test for the format version: MBCS is at version 5, the rest still at 4 */
00499     info.size=sizeof(UDataInfo);
00500     udata_getInfo(pData, &info);
00501     if(type == UCNV_MBCS ? info.formatVersion[0] != 5 : info.formatVersion[0] != 4) {
00502         *status = U_INVALID_TABLE_FORMAT;
00503         return NULL;
00504     }
00505 #endif
00506 
00507     data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
00508     if(data == NULL) {
00509         *status = U_MEMORY_ALLOCATION_ERROR;
00510         return NULL;
00511     }
00512 
00513     /* copy initial values from the static structure for this type */
00514     uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
00515 
00516     /* ### it would be much more efficient if the table were a direct member, not a pointer */
00517     data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable));
00518     if(data->table == NULL) {
00519         uprv_free(data);
00520         *status = U_MEMORY_ALLOCATION_ERROR;
00521         return NULL;
00522     }
00523     
00524     data->staticData = source;
00525 
00526     /* fill in fields from the loaded data */
00527     data->dataMemory = (void*)pData; /* for future use */
00528 
00529     if(data->impl->load != NULL) {
00530         data->impl->load(data, raw + source->structSize, status);
00531         if(U_FAILURE(*status)) {
00532             uprv_free(data);
00533             return NULL;
00534         }
00535     }
00536     return data;
00537 }

Generated at Tue Dec 5 10:48:00 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000