Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnv_io.c

00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *
00009 *
00010 *  ucnv_io.c:
00011 *  initializes global variables and defines functions pertaining to file access,
00012 *  and name resolution aspect of the library.
00013 *
00014 *   new implementation:
00015 *
00016 *   created on: 1999nov22
00017 *   created by: Markus W. Scherer
00018 *
00019 *   Use the binary cnvalias.dat (created from convrtrs.txt) to work
00020 *   with aliases for converter names.
00021 ********************************************************************************
00022 */
00023 
00024 #include "unicode/utypes.h"
00025 #include "unicode/ucnv.h"           /* This file implements ucnv_xXXX() APIs */
00026 #include "umutex.h"
00027 #include "cstring.h"
00028 #include "cmemory.h"
00029 #include "ucnv_io.h"
00030 #include "unicode/udata.h"
00031 
00032 /* Format of cnvalias.dat ------------------------------------------------------
00033  *
00034  * cnvalias.dat is a binary, memory-mappable form of convrtrs.txt .
00035  * It contains two sorted tables and a block of zero-terminated strings.
00036  * Each table is preceded by the number of table entries.
00037  *
00038  * The first table maps from aliases to converter indexes.
00039  * The converter names themselves are listed as aliases in this table.
00040  * Each entry in this table has an offset to the alias and
00041  * an index of the converter in the converter table.
00042  *
00043  * The second table lists only the converters themselves.
00044  * Each entry in this table has an offset to the converter name and
00045  * the number of aliases, including the converter itself.
00046  * A count of 1 means that there is no alias, only the converter name.
00047  *
00048  * In the block of strings after the tables, each converter name is directly
00049  * followed by its aliases. All offsets to strings are offsets from the
00050  * beginning of the data.
00051  *
00052  * More formal file data structure (data format 2.1):
00053  *
00054  * uint16_t aliasCount;
00055  * uint16_t aliasOffsets[aliasCount];
00056  * uint16_t converterIndexes[aliasCount];
00057  *
00058  * uint16_t converterCount;
00059  * struct {
00060  *     uint16_t converterOffset;
00061  *     uint16_t aliasCount;
00062  * } converters[converterCount];
00063  *
00064  * uint16_t tagCount;
00065  * uint16_t taggedAliasesOffsets[tagCount][converterCount];
00066  * char tags[] = { "Tag0\Tag1\0..." };
00067  *
00068  * char strings[]={
00069  *     "Converter0\0Alias1\0Alias2\0...Converter1\0Converter2\0Alias0\Alias1\0..."
00070  * };
00071  *
00072  * The code included here can read versions 2 and 2.1 of the data format.
00073  * Version 2 does not have tag information, but since the code never refers
00074  * to strings[] by its base offset, it's okay.
00075  *
00076  */
00077 
00078 #define DATA_NAME "cnvalias"
00079 #define DATA_TYPE "dat"
00080 
00081 static UDataMemory *aliasData=NULL;
00082 static const uint16_t *aliasTable=NULL;
00083 
00084 static const uint16_t *converterTable = NULL;
00085 static const uint16_t *tagTable = NULL;
00086 
00087 static UBool
00088 isAcceptable(void *context,
00089              const char *type, const char *name,
00090              const UDataInfo *pInfo) {
00091     return (UBool)(
00092         pInfo->size>=20 &&
00093         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
00094         pInfo->charsetFamily==U_CHARSET_FAMILY &&
00095         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
00096         pInfo->dataFormat[1]==0x76 &&
00097         pInfo->dataFormat[2]==0x41 &&
00098         pInfo->dataFormat[3]==0x6c &&
00099         pInfo->formatVersion[0]>1);
00100 }
00101 
00102 static UBool
00103 haveAliasData(UErrorCode *pErrorCode) {
00104     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00105         return FALSE;
00106     }
00107 
00108     /* load converter alias data from file if necessary */
00109     if(aliasData==NULL) {
00110         UDataMemory *data;
00111         UDataInfo info;
00112         const uint16_t *table=NULL;
00113 
00114         /* open the data outside the mutex block */
00115         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
00116         if(U_FAILURE(*pErrorCode)) {
00117             return FALSE;
00118         }
00119 
00120         table=(const uint16_t *)udata_getMemory(data);
00121         info.size=sizeof(UDataInfo);
00122         udata_getInfo(data, &info);
00123 
00124         /* in the mutex block, set the data for this process */
00125         umtx_lock(NULL);
00126         if(aliasData==NULL) {
00127             aliasData=data;
00128             data=NULL;
00129             aliasTable=table;
00130             table=NULL;
00131             converterTable = aliasTable + 1 + 2 * *aliasTable;
00132 
00133             if (info.formatVersion[0] > 1 && info.formatVersion[1] > 0) {
00134                 tagTable = converterTable + 1 + 2 * *converterTable;
00135             }
00136         }
00137         umtx_unlock(NULL);
00138 
00139         /* if a different thread set it first, then close the extra data */
00140         if(data!=NULL) {
00141             udata_close(data); /* NULL if it was set correctly */
00142         }
00143     }
00144 
00145     return TRUE;
00146 }
00147 
00148 static UBool
00149 isAlias(const char *alias, UErrorCode *pErrorCode) {
00150     if(alias==NULL) {
00151         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00152         return FALSE;
00153     } else if(*alias==0) {
00154         return FALSE;
00155     } else {
00156         return TRUE;
00157     }
00158 }
00159 
00160 static int16_t getTagNumber(const char *tagname) {
00161     if (tagTable) {
00162         int16_t tag, count = (int16_t) *tagTable;
00163         const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
00164 
00165 #if 0
00166 
00167         char name[100];
00168         int i;
00169 
00170         /* convert the tag name to lowercase to do case-insensitive comparisons */
00171         for(i = 0; i < sizeof(name) - 1 && *tagname; ++i) {
00172             name[i] = (char)uprv_tolower(*tagname++);
00173         }
00174         name[i] = 0;
00175 
00176 #else
00177 
00178         const char *name = tagname;
00179 
00180 #endif
00181 
00182         for (tag = 0; count--; ++tag) {
00183             if (!uprv_stricmp(name, tags)) {
00184                 return tag;
00185             }
00186             tags += strlen(tags) + 1;
00187         }
00188     }
00189 
00190     return -1;
00191 }
00192 
00210 U_CAPI int U_EXPORT2
00211 ucnv_compareNames(const char *name1, const char *name2) {
00212     int rc;
00213     unsigned char c1, c2;
00214 
00215     for (;;) {
00216         /* Ignore delimiters '-', '_', and ' ' */
00217         while ((c1 = (unsigned char)*name1) == '-'
00218                || c1 == '_' || c1 == ' ') ++name1;
00219         while ((c2 = (unsigned char)*name2) == '-'
00220                || c2 == '_' || c2 == ' ') ++name2;
00221 
00222         /* If we reach the ends of both strings then they match */
00223         if ((c1|c2)==0) {
00224             return 0;
00225         }
00226         
00227         /* Case-insensitive comparison */
00228         rc = (int)(unsigned char)uprv_tolower(c1) -
00229              (int)(unsigned char)uprv_tolower(c2);
00230         if (rc!=0) {
00231             return rc;
00232         }
00233         ++name1;
00234         ++name2;
00235     }
00236 }
00237 
00238 /*
00239  * search for an alias
00240  * return NULL or a pointer to the converter table entry
00241  */
00242 static const uint16_t *
00243 findAlias(const char *alias) {
00244     char name[100];
00245     const uint16_t *p=aliasTable;
00246     uint16_t i, start, limit;
00247 
00248     limit=*p++;
00249     if(limit==0) {
00250         /* there are no aliases */
00251         return NULL;
00252     }
00253 
00254     /* convert the alias name to lowercase to do case-insensitive comparisons */
00255     for(i=0; i<sizeof(name)-1 && *alias!=0; ++i) {
00256         name[i]=(char)uprv_tolower(*alias++);
00257     }
00258     name[i]=0;
00259 
00260     /* do a binary search for the alias */
00261     start=0;
00262     while(start<limit-1) {
00263         i=(uint16_t)((start+limit)/2);
00264         if(ucnv_compareNames(name, (const char *)aliasTable+p[i])<0) {
00265             limit=i;
00266         } else {
00267             start=i;
00268         }
00269     }
00270 
00271     /* did we really find it? */
00272     if(ucnv_compareNames(name, (const char *)aliasTable+p[start])==0) {
00273         limit=*(p-1);       /* aliasCount */
00274         p+=limit;           /* advance to the second column of the alias table */
00275         i=p[start];         /* converter index */
00276         return
00277             p+limit+        /* beginning of converter table */
00278             1+              /* skip its count */
00279             2*i;            /* go to this converter's entry and return a pointer to it */
00280     } else {
00281         return NULL;
00282     }
00283 }
00284 
00285 U_CFUNC const char *
00286 ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode) {
00287     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
00288         const uint16_t *p=findAlias(alias);
00289         if(p!=NULL) {
00290             return (const char *)aliasTable+*p;
00291         }
00292     }
00293     return NULL;
00294 }
00295 
00296 U_CFUNC uint16_t
00297 ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) {
00298     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
00299         const uint16_t *p=findAlias(alias);
00300         if(p!=NULL) {
00301             *aliases=(const char *)aliasTable+*p;
00302             return *(p+1);
00303         }
00304     }
00305     return 0;
00306 }
00307 
00308 U_CFUNC const char *
00309 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
00310     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
00311         const uint16_t *p=findAlias(alias);
00312         if(p!=NULL) {
00313             uint16_t count=*(p+1);
00314             if(n<count) {
00315                 const char *aliases=(const char *)aliasTable+*p;
00316                 while(n>0) {
00317                     /* skip a name, first the canonical converter name */
00318                     aliases+=uprv_strlen(aliases)+1;
00319                     --n;
00320                 }
00321                 return aliases;
00322             }
00323         }
00324     }
00325     return NULL;
00326 }
00327 
00328 U_CFUNC uint16_t
00329 ucnv_io_countStandards(UErrorCode *pErrorCode) {
00330     if (haveAliasData(pErrorCode)) {
00331         if (!tagTable) {
00332             *pErrorCode = U_INVALID_FORMAT_ERROR;
00333             return 0;
00334         }
00335 
00336         return *tagTable;
00337     }
00338 
00339     return 0;
00340 }
00341 
00342 U_CAPI const char * U_EXPORT2
00343 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
00344     if (haveAliasData(pErrorCode) && tagTable) {
00345         int16_t count = (int16_t) *tagTable;
00346         const char *tags = (const char *) (tagTable + 1 + count * *converterTable);
00347 
00348         while (n-- && count--) {
00349             tags += strlen(tags) + 1;
00350         }
00351 
00352         return count ? tags : NULL;
00353     }
00354 
00355     return NULL;
00356 }
00357 
00358 U_CFUNC const char * U_EXPORT2
00359 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
00360     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
00361         const uint16_t *p = findAlias(alias);
00362         if(p != NULL) {
00363             int16_t tag = getTagNumber(standard);
00364 
00365             if (tag > -1) {
00366                 uint16_t offset = tagTable[1 + tag * *converterTable + (p - converterTable) / 2];
00367                 return offset ? (const char *) aliasTable + offset : NULL;
00368             }
00369         }
00370     }
00371 
00372    return NULL;
00373 }
00374 
00375 U_CFUNC uint16_t
00376 ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) {
00377     if(haveAliasData(pErrorCode)) {
00378         return *converterTable;
00379     }
00380     return 0;
00381 }
00382 
00383 U_CFUNC const char *
00384 ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
00385     if(haveAliasData(pErrorCode)) {
00386         const uint16_t *p=converterTable;
00387         if(n<*p) {
00388             return (const char *)aliasTable+p[1+2*n];
00389         }
00390     }
00391     return NULL;
00392 }
00393 
00394 U_CFUNC void
00395 ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) {
00396     if(haveAliasData(pErrorCode)) {
00397         const uint16_t *p=converterTable;
00398         uint16_t count=*p++;
00399         while(count>0) {
00400             *aliases++=(const char *)aliasTable+*p;
00401             p+=2;
00402             --count;
00403         }
00404     }
00405 }
00406 
00407 U_CFUNC uint16_t
00408 ucnv_io_countAvailableAliases(UErrorCode *pErrorCode) {
00409     if(haveAliasData(pErrorCode)) {
00410         return *aliasTable;
00411     }
00412     return 0;
00413 }
00414 
00415 #if 0
00416 /*
00417  * We are not currently using these functions, so I am commenting them out
00418  * to reduce the binary file size and improve the code coverage;
00419  * I do not currently want to remove this entirely because it may be useful
00420  * in the future and also serves to some degree as another piece of
00421  * documentation of the data structure.
00422  */
00423 U_CFUNC const char *
00424 ucnv_io_getAvailableAlias(uint16_t n, UErrorCode *pErrorCode) {
00425     if(haveAliasData(pErrorCode) && n<*aliasTable) {
00426         return (const char *)aliasTable+*(aliasTable+1+n);
00427     }
00428     return NULL;
00429 }
00430 
00431 U_CFUNC void
00432 ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode) {
00433     if(haveAliasData(pErrorCode)) {
00434         const uint16_t *p=aliasTable;
00435         uint16_t count=*p++;
00436         while(count>0) {
00437             *aliases++=(const char *)aliasTable+*p;
00438             ++p;
00439             --count;
00440         }
00441     }
00442 }
00443 #endif
00444 
00445 /* default converter name --------------------------------------------------- */
00446 
00447 /*
00448  * In order to be really thread-safe, the get function would have to take
00449  * a buffer parameter and copy the current string inside a mutex block.
00450  * This implementation only tries to be really thread-safe while
00451  * setting the name.
00452  * It assumes that setting a pointer is atomic.
00453  */
00454 
00455 static char defaultConverterNameBuffer[100];
00456 static const char *defaultConverterName = NULL;
00457 
00458 U_CFUNC const char *
00459 ucnv_io_getDefaultConverterName() {
00460     /* local variable to be thread-safe */
00461     const char *name=defaultConverterName;
00462     if(name==NULL) {
00463         const char *codepage=0;
00464         umtx_lock(NULL);        
00465         codepage = uprv_getDefaultCodepage();
00466         umtx_unlock(NULL);
00467         if(codepage!=NULL) {
00468             UErrorCode errorCode=U_ZERO_ERROR;
00469             name=ucnv_io_getConverterName(codepage, &errorCode);
00470             if(U_FAILURE(errorCode) || name==NULL) {
00471                 name=codepage;
00472             }
00473             defaultConverterName=name;
00474         }
00475     }
00476     return name;
00477 }
00478 
00479 U_CFUNC void
00480 ucnv_io_setDefaultConverterName(const char *converterName) {
00481     if(converterName==NULL) {
00482         /* reset to the default codepage */
00483         defaultConverterName=NULL;
00484     } else {
00485         UErrorCode errorCode=U_ZERO_ERROR;
00486         const char *name=ucnv_io_getConverterName(converterName, &errorCode);
00487         if(U_SUCCESS(errorCode) && name!=NULL) {
00488             defaultConverterName=name;
00489         } else {
00490             /* do not set the name if the alias lookup failed and it is too long */
00491             int32_t length=uprv_strlen(converterName);
00492             if(length<sizeof(defaultConverterNameBuffer)) {
00493                 /* it was not found as an alias, so copy it - accept an empty name */
00494                 UBool didLock;
00495                 if(defaultConverterName==defaultConverterNameBuffer) {
00496                     umtx_lock(NULL);
00497                     didLock=TRUE;
00498                 } else {
00499                     didLock=FALSE;
00500                 }
00501                 uprv_memcpy(defaultConverterNameBuffer, converterName, length);
00502                 defaultConverterNameBuffer[length]=0;
00503                 defaultConverterName=defaultConverterNameBuffer;
00504                 if(didLock) {
00505                     umtx_unlock(NULL);
00506                 }
00507             }
00508         }
00509     }
00510 }
00511 
00512 /*
00513  * Hey, Emacs, please set the following:
00514  *
00515  * Local Variables:
00516  * indent-tabs-mode: nil
00517  * End:
00518  *
00519  */
00520 

Generated at Tue Dec 5 10:48:01 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000