Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnv.c

00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1998-1999, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *
00009 *  ucnv.c:
00010 *  Implements APIs for the ICU's codeset conversion library
00011 *  mostly calls through internal functions created and maintained 
00012 *  by Bertrand A. Damiba
00013 *
00014 * Modification History:
00015 *
00016 *   Date        Name        Description
00017 *   04/04/99    helena      Fixed internal header inclusion.
00018 *   05/09/00    helena      Added implementation to handle fallback mappings.
00019 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
00020 */
00021 #include "umutex.h"
00022 #include "unicode/ures.h"
00023 #include "uhash.h"
00024 #include "ucmp16.h"
00025 #include "ucmp8.h"
00026 #include "ucnv_io.h"
00027 #include "unicode/ucnv_err.h"
00028 #include "ucnv_cnv.h"
00029 #include "ucnv_imp.h"
00030 #include "unicode/ucnv.h"
00031 #include "cmemory.h"
00032 #include "cstring.h"
00033 #include "unicode/ustring.h"
00034 #include "unicode/uloc.h"
00035 #include "ucnv_bld.h"
00036 
00037 #if 0
00038 /* debugging for converters */
00039 # include <stdio.h>
00040 void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l)
00041 {
00042    static FILE *f = NULL;
00043    if(f==NULL)
00044    {
00045        f = fopen("c:\\UCNV_DEBUG_LOG.txt", "w");
00046    }
00047    fprintf(f, "%-20s %-10s %p@%d\n",
00048         who,what,p,l);
00049    fflush(f);
00050 }
00051 # define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
00052 #else
00053 # define UCNV_DEBUG_LOG(x,y,z)
00054 #endif
00055 
00056 #define CHUNK_SIZE 5*1024
00057 
00058 /* Internal function : begin */
00059 static int32_t ucnv_getAmbiguousCCSID (const UConverter* cnv);
00060 /* Internal function : end */
00061 
00062 static void T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
00063                                                  UConverter * inConverter,
00064                                                  char **target,
00065                                                  const char *targetLimit,
00066                                                  const char **source,
00067                                                  const char *sourceLimit,
00068                                                  int32_t* offsets,
00069                                                  UBool flush,
00070                                                  UErrorCode * err);
00071 
00072 
00073 const char* ucnv_getDefaultName ()
00074 {
00075   return ucnv_io_getDefaultConverterName();
00076 }
00077 
00078 void   ucnv_setDefaultName (const char *converterName)
00079 {
00080   ucnv_io_setDefaultConverterName(converterName);
00081 }
00082 /*Calls through createConverter */
00083 UConverter* ucnv_open (const char *name,
00084                        UErrorCode * err)
00085 {
00086   if (err == NULL || U_FAILURE (*err)) {
00087     return NULL;
00088   }
00089 
00090   return createConverter (name, err);
00091 }
00092 
00093 /*Extracts the UChar* to a char* and calls through createConverter */
00094 UConverter*  ucnv_openU (const UChar * name,
00095                          UErrorCode * err)
00096 {
00097   char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
00098   
00099   if (U_FAILURE (*err))
00100     return NULL;
00101   if (name == NULL)
00102     return ucnv_open (NULL, err);
00103   if (u_strlen (name) > UCNV_MAX_CONVERTER_NAME_LENGTH)
00104     {
00105       *err = U_ILLEGAL_ARGUMENT_ERROR;
00106       return NULL;
00107     }
00108   return ucnv_open (u_austrcpy (asciiName, name), err);
00109 }
00110 
00111 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
00112  *through createConverter*/
00113 UConverter*  ucnv_openCCSID (int32_t codepage,
00114                              UConverterPlatform platform,
00115                              UErrorCode * err)
00116 {
00117   char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
00118 
00119   if (U_FAILURE (*err))
00120     return NULL;
00121 
00122   copyPlatformString (myName, platform);
00123   uprv_strcat (myName, "-");
00124   T_CString_integerToString (myName + uprv_strlen (myName), codepage, 10);
00125 
00126 
00127   return createConverter (myName, err);
00128 }
00129 
00130 /*Decreases the reference counter in the shared immutable section of the object
00131  *and frees the mutable part*/
00132 
00133 void ucnv_close (UConverter * converter)
00134 {
00135   /* first, notify the callback functions that the converter is closed */
00136   UConverterToUnicodeArgs toUArgs = {
00137     sizeof(UConverterToUnicodeArgs),
00138     TRUE,
00139     NULL,
00140     NULL,
00141     NULL,
00142     NULL,
00143     NULL,
00144     NULL
00145   };
00146   UConverterFromUnicodeArgs fromUArgs = {
00147     sizeof(UConverterFromUnicodeArgs),
00148     TRUE,
00149     NULL,
00150     NULL,
00151     NULL,
00152     NULL,
00153     NULL,
00154     NULL
00155   };
00156   UErrorCode errorCode;
00157 
00158   if (converter == NULL)
00159   {
00160     return;
00161   }
00162 
00163   toUArgs.converter = fromUArgs.converter = converter;
00164   errorCode = U_ZERO_ERROR;
00165   converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
00166   errorCode = U_ZERO_ERROR;
00167   converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
00168 
00169   if (converter->sharedData->impl->close != NULL) {
00170     converter->sharedData->impl->close(converter);
00171   }
00172 
00173   if (converter->sharedData->referenceCounter != ~0) {
00174     umtx_lock (NULL);
00175     if (converter->sharedData->referenceCounter != 0) {
00176       converter->sharedData->referenceCounter--;
00177     }
00178     umtx_unlock (NULL);
00179   }
00180   uprv_free (converter);
00181 
00182   return;
00183 }
00184 
00185 /*Frees all shared immutable objects that aren't referred to (reference count = 0)
00186  */
00187 int32_t  ucnv_flushCache ()
00188 {
00189   UConverterSharedData *mySharedData = NULL;
00190   int32_t pos = -1;
00191   int32_t tableDeletedNum = 0;
00192   const UHashElement *e;
00193 
00194   /*if shared data hasn't even been lazy evaluated yet
00195    * return 0
00196    */
00197   if (SHARED_DATA_HASHTABLE == NULL)
00198     return 0;
00199 
00200   /*creates an enumeration to iterate through every element in the
00201    *table
00202    */
00203   umtx_lock (NULL);
00204   while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
00205     {
00206       mySharedData = (UConverterSharedData *) e->value;
00207       /*deletes only if reference counter == 0 */
00208       if (mySharedData->referenceCounter == 0)
00209         {
00210           tableDeletedNum++;
00211 
00212           UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
00213 
00214           uhash_removeElement(SHARED_DATA_HASHTABLE, e);
00215           deleteSharedConverterData (mySharedData);
00216         }
00217     }
00218   umtx_unlock (NULL);
00219 
00220   return tableDeletedNum;
00221 }
00222 
00223 /*returns a single Name from the list, will return NULL if out of bounds
00224  */
00225 const char*  ucnv_getAvailableName (int32_t n)
00226 {
00227   if (0 <= n && n <= 0xffff) {
00228     UErrorCode err = U_ZERO_ERROR;
00229     const char *name = ucnv_io_getAvailableConverter((uint16_t)n, &err);
00230     if (U_SUCCESS(err)) {
00231       return name;
00232     }
00233   }
00234   return NULL;
00235 }
00236 
00237 int32_t  ucnv_countAvailable ()
00238 {
00239   UErrorCode err = U_ZERO_ERROR;
00240   return ucnv_io_countAvailableConverters(&err);
00241 }
00242 
00243 U_CAPI uint16_t
00244 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) {
00245     const char *p;
00246     return ucnv_io_getAliases(alias, &p, pErrorCode);
00247 }
00248 
00249 
00250 U_CAPI const char *
00251 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
00252     return ucnv_io_getAlias(alias, n, pErrorCode);
00253 }
00254 
00255 U_CAPI void
00256 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) {
00257     const char *p;
00258     uint16_t count=ucnv_io_getAliases(alias, &p, pErrorCode);
00259     while(count>0) {
00260         *aliases++=p;
00261         /* skip a name, first the canonical converter name */
00262         p+=uprv_strlen(p)+1;
00263         --count;
00264     }
00265 }
00266 
00267 U_CAPI uint16_t
00268 ucnv_countStandards(void) {
00269     UErrorCode err = U_ZERO_ERROR;
00270     return ucnv_io_countStandards(&err);
00271 }
00272 
00273 void   ucnv_getSubstChars (const UConverter * converter,
00274                            char *mySubChar,
00275                            int8_t * len,
00276                            UErrorCode * err)
00277 {
00278   if (U_FAILURE (*err))
00279     return;
00280 
00281   if (*len < converter->subCharLen)     /*not enough space in subChars */
00282     {
00283       *err = U_INDEX_OUTOFBOUNDS_ERROR;
00284       return;
00285     }
00286 
00287   uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen);   /*fills in the subchars */
00288   *len = converter->subCharLen; /*store # of bytes copied to buffer */
00289 
00290   return;
00291 }
00292 
00293 void   ucnv_setSubstChars (UConverter * converter,
00294                            const char *mySubChar,
00295                            int8_t len,
00296                            UErrorCode * err)
00297 {
00298   if (U_FAILURE (*err))
00299     return;
00300 
00301   /*Makes sure that the subChar is within the codepages char length boundaries */
00302   if ((len > converter->sharedData->staticData->maxBytesPerChar)
00303       || (len < converter->sharedData->staticData->minBytesPerChar))
00304     {
00305       *err = U_ILLEGAL_ARGUMENT_ERROR;
00306       return;
00307     }
00308 
00309   uprv_memcpy (converter->subChar, mySubChar, len);     /*copies the subchars */
00310   converter->subCharLen = len;  /*sets the new len */
00311 
00312   return;
00313 }
00314 
00315 
00316 
00317 
00318 int32_t  ucnv_getDisplayName (const UConverter * converter,
00319                               const char *displayLocale,
00320                               UChar * displayName,
00321                               int32_t displayNameCapacity,
00322                               UErrorCode * err)
00323 {
00324   UChar stringToWriteBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH];
00325   UChar const *stringToWrite;
00326   int32_t stringToWriteLength;
00327   UResourceBundle *rb = NULL;
00328 
00329   if (U_FAILURE (*err))
00330     return 0;
00331 
00332   /*create an RB, init the fill-in string, gets it from the RB */
00333   rb = ures_open (NULL, displayLocale, err);
00334 
00335   stringToWrite = ures_getStringByKey(rb,
00336                 converter->sharedData->staticData->name,
00337                 &stringToWriteLength,
00338                 err);
00339   if (rb)
00340     ures_close (rb);
00341 
00342   if(U_FAILURE(*err))
00343     {
00344       /*Error While creating or getting resource from the resource bundle
00345        *use the internal name instead
00346        *
00347        *sets stringToWriteLength (which accounts for a NULL terminator)
00348        *and stringToWrite
00349        */
00350       stringToWriteLength = uprv_strlen (converter->sharedData->staticData->name) + 1;
00351       stringToWrite = u_uastrcpy (stringToWriteBuffer, converter->sharedData->staticData->name);
00352 
00353       /*Hides the fallback to the internal name from the user */
00354       if (*err == U_MISSING_RESOURCE_ERROR)
00355         *err = U_ZERO_ERROR;
00356     }
00357 
00358   /*At this point we have a displayName and its length
00359    *we want to see if it fits in the user provided params
00360    */
00361 
00362   if (stringToWriteLength <= displayNameCapacity)
00363     {
00364       /*it fits */
00365       u_strcpy (displayName, stringToWrite);
00366     }
00367   else
00368     {
00369       /*it doesn't fit */
00370       *err = U_BUFFER_OVERFLOW_ERROR;
00371 
00372       u_strncpy (displayName, stringToWrite, displayNameCapacity);
00373       /*Zero terminates the string */
00374       if (displayNameCapacity > 0)
00375         displayName[displayNameCapacity - 1] = 0x0000;
00376     }
00377 
00378   /*if the user provided us with a with an outputLength
00379    *buffer we'll store in it the theoretical size of the
00380    *displayString
00381    */
00382   return stringToWriteLength;
00383 }
00384 
00385 
00386 /*resets the internal states of a converter
00387  *goal : have the same behaviour than a freshly created converter
00388  */
00389 void  ucnv_reset (UConverter * converter)
00390 {
00391   /* first, notify the callback functions that the converter is reset */
00392   UConverterToUnicodeArgs toUArgs = {
00393     sizeof(UConverterToUnicodeArgs),
00394     TRUE,
00395     NULL,
00396     NULL,
00397     NULL,
00398     NULL,
00399     NULL,
00400     NULL
00401   };
00402   UConverterFromUnicodeArgs fromUArgs = {
00403     sizeof(UConverterFromUnicodeArgs),
00404     TRUE,
00405     NULL,
00406     NULL,
00407     NULL,
00408     NULL,
00409     NULL,
00410     NULL
00411   };
00412   UErrorCode errorCode;
00413 
00414   if(converter == NULL) {
00415     return;
00416   }
00417   toUArgs.converter = fromUArgs.converter = converter;
00418   errorCode = U_ZERO_ERROR;
00419   converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
00420   errorCode = U_ZERO_ERROR;
00421   converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
00422 
00423   /* now reset the converter itself */
00424   converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
00425   converter->fromUnicodeStatus = 0;
00426   converter->UCharErrorBufferLength = 0;
00427   converter->charErrorBufferLength = 0;
00428   if (converter->sharedData->impl->reset != NULL) {
00429     /* call the custom reset function */
00430     converter->sharedData->impl->reset(converter);
00431   } else {
00432     converter->mode = UCNV_SI;
00433   }
00434 }
00435 
00436 void ucnv_resetToUnicode(UConverter *converter)
00437 {
00438 #if 0
00439   UConverterToUnicodeArgs toUArgs = {
00440     sizeof(UConverterToUnicodeArgs),
00441     TRUE,
00442     NULL,
00443     NULL,
00444     NULL,
00445     NULL,
00446     NULL,
00447     NULL
00448   };
00449   UErrorCode errorCode = U_ZERO_ERROR;
00450 
00451   if(converter == NULL) {
00452     return;
00453   }
00454 
00455   toUArgs.converter = converter;
00456   converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
00457 
00458   /* now reset the converter itself */
00459   converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
00460   converter->fromUnicodeStatus = 0;
00461   converter->UCharErrorBufferLength = 0;
00462   converter->charErrorBufferLength = 0;
00463 
00464   /* Todo: Needs rest of implementation */
00465 #endif
00466 }
00467 
00468 void ucnv_resetFromUnicode(UConverter *converter)
00469 {
00470 #if 0
00471   UConverterFromUnicodeArgs fromUArgs = {
00472     sizeof(UConverterFromUnicodeArgs),
00473     TRUE,
00474     NULL,
00475     NULL,
00476     NULL,
00477     NULL,
00478     NULL,
00479     NULL
00480   };
00481   UErrorCode errorCode = U_ZERO_ERROR;
00482 
00483   if(converter == NULL) {
00484     return;
00485   }
00486 
00487   fromUArgs.converter = converter;
00488   converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
00489 
00490   /* now reset the converter itself */
00491   converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
00492   converter->fromUnicodeStatus = 0;
00493   converter->UCharErrorBufferLength = 0;
00494   converter->charErrorBufferLength = 0;
00495 
00496   /* Todo: Needs rest of implementation */
00497 #endif
00498 }
00499 
00500 int8_t  ucnv_getMaxCharSize (const UConverter * converter)
00501 {
00502   return converter->sharedData->staticData->maxBytesPerChar;
00503 }
00504 
00505 
00506 int8_t  ucnv_getMinCharSize (const UConverter * converter)
00507 {
00508   return converter->sharedData->staticData->minBytesPerChar;
00509 }
00510 
00511 const char*  ucnv_getName (const UConverter * converter, UErrorCode * err)
00512      
00513 {
00514   if (U_FAILURE (*err))
00515     return NULL;
00516   if(converter->sharedData->impl->getName){
00517       const char* temp= converter->sharedData->impl->getName(converter);
00518       if(temp)
00519           return temp;
00520   }
00521   return converter->sharedData->staticData->name;
00522 }
00523 
00524 int32_t  ucnv_getCCSID (const UConverter * converter,
00525                         UErrorCode * err)
00526 {
00527   if (U_FAILURE (*err))
00528     return -1;
00529 
00530   return converter->sharedData->staticData->codepage;
00531 }
00532 
00533 
00534 UConverterPlatform  ucnv_getPlatform (const UConverter * converter,
00535                                       UErrorCode * err)
00536 {
00537   if (U_FAILURE (*err))
00538     return UCNV_UNKNOWN;
00539   
00540   return (UConverterPlatform)converter->sharedData->staticData->platform;
00541 }
00542 
00543 U_CAPI void U_EXPORT2
00544     ucnv_getToUCallBack (const UConverter * converter,
00545                          UConverterToUCallback *action,
00546                          void **context)
00547 {
00548   *action = converter->fromCharErrorBehaviour;
00549   *context = converter->toUContext;
00550 }
00551 
00552 U_CAPI void U_EXPORT2
00553     ucnv_getFromUCallBack (const UConverter * converter,
00554                            UConverterFromUCallback *action,
00555                            void **context)
00556 {
00557   *action = converter->fromUCharErrorBehaviour;
00558   *context = converter->fromUContext;
00559 }
00560 
00561 void   ucnv_setToUCallBack (UConverter * converter,
00562                             UConverterToUCallback newAction,
00563                             void* newContext,
00564                             UConverterToUCallback *oldAction,
00565                             void** oldContext,
00566                             UErrorCode * err)
00567 {
00568   if (U_FAILURE (*err))
00569     return;
00570   *oldAction = converter->fromCharErrorBehaviour;
00571   converter->fromCharErrorBehaviour = newAction;
00572   *oldContext = converter->toUContext;
00573   converter->toUContext = newContext;
00574 }
00575 
00576 void ucnv_setFromUCallBack (UConverter * converter,
00577                             UConverterFromUCallback newAction,
00578                             void* newContext,
00579                             UConverterFromUCallback *oldAction,
00580                             void** oldContext,
00581                             UErrorCode * err)
00582 {
00583   
00584   if (U_FAILURE (*err))
00585     return;
00586   *oldAction = converter->fromUCharErrorBehaviour;
00587   converter->fromUCharErrorBehaviour = newAction;
00588   *oldContext = converter->fromUContext;
00589   converter->fromUContext = newContext;
00590 }
00591 
00592 void ucnv_fromUnicode (UConverter * _this,
00593                        char **target,
00594                        const char *targetLimit,
00595                        const UChar ** source,
00596                        const UChar * sourceLimit,
00597                        int32_t* offsets,
00598                        UBool flush,
00599                        UErrorCode * err)
00600 {
00601   UConverterFromUnicodeArgs args;
00602   /*
00603    * Check parameters in for all conversions
00604    */
00605   if (U_FAILURE (*err))   return;
00606   if ((_this == NULL) || ((char *) targetLimit < *target) || (sourceLimit < *source))
00607     {
00608       *err = U_ILLEGAL_ARGUMENT_ERROR;
00609       return;
00610     }
00611   
00612 
00613   /*
00614    * Deal with stored carry over data.  This is done in the common location
00615    * to avoid doing it for each conversion.
00616    */
00617   if (_this->charErrorBufferLength > 0)
00618     {
00619       int32_t myTargetIndex = 0;
00620 
00621       flushInternalCharBuffer (_this, 
00622                                (char *) *target,
00623                                &myTargetIndex,
00624                                targetLimit - *target,
00625                                offsets?&offsets:NULL,
00626                                err);
00627       *target += myTargetIndex;
00628       if (U_FAILURE (*err)) return;
00629     }
00630 
00631   args.converter = _this;
00632   args.flush = flush;
00633   args.offsets = offsets;
00634   args.source = *source;
00635   args.sourceLimit = sourceLimit;
00636   args.target = *target;
00637   args.targetLimit = targetLimit;
00638   args.size = sizeof(args);
00639   if (offsets) {
00640     if (_this->sharedData->impl->fromUnicodeWithOffsets != NULL) {
00641        _this->sharedData->impl->fromUnicodeWithOffsets(&args, err);
00642        *source = args.source;
00643        *target = args.target;
00644        return;
00645     } else {
00646       /* all code points are of the same length */
00647       int32_t targetSize = targetLimit - *target;
00648       int32_t i, bytesPerChar = _this->sharedData->staticData->maxBytesPerChar;
00649 
00650       if(bytesPerChar == 1) {
00651         for (i=0; i<targetSize; i++) {
00652           args.offsets[i] = i;
00653         }
00654       } else if(bytesPerChar == 2) {
00655         for (i=0; i<targetSize; i++) {
00656           args.offsets[i] = i>>1;
00657         }
00658       } else {
00659         int32_t j = 0, k = bytesPerChar;
00660 
00661         for (i=0; i<targetSize; i++) {
00662           /* offsets[i] = i/bytesPerChar; -- without division */
00663           args.offsets[i] = j;
00664           if(--k == 0) {
00665             k = bytesPerChar;
00666             ++j;
00667           }
00668         }
00669       }
00670     }
00671   }
00672 
00673   /*calls the specific conversion routines */
00674   _this->sharedData->impl->fromUnicode(&args, err);
00675   *source = args.source;
00676   *target = args.target;
00677   return;
00678 }
00679 
00680 
00681 
00682 void   ucnv_toUnicode (UConverter * _this,
00683                        UChar ** target,
00684                        const UChar * targetLimit,
00685                        const char **source,
00686                        const char *sourceLimit,
00687                        int32_t* offsets,
00688                        UBool flush,
00689                        UErrorCode * err)
00690 {
00691   UConverterToUnicodeArgs args;
00692   /*
00693    * Check parameters in for all conversions
00694    */
00695   if (U_FAILURE (*err))   return;
00696   if ((_this == NULL) || ((UChar *) targetLimit < *target) || (sourceLimit < *source))
00697     {
00698       *err = U_ILLEGAL_ARGUMENT_ERROR;
00699       return;
00700     }
00701 
00702   /*
00703    * Deal with stored carry over data.  This is done in the common location
00704    * to avoid doing it for each conversion.
00705    */
00706   if (_this->UCharErrorBufferLength > 0)
00707     {
00708       int32_t myTargetIndex = 0;
00709 
00710       flushInternalUnicodeBuffer (_this, 
00711                                   *target,
00712                                   &myTargetIndex,
00713                                   targetLimit - *target,
00714                                   offsets?&offsets:NULL,
00715                                   err);
00716       *target += myTargetIndex;
00717       if (U_FAILURE (*err))
00718         return;
00719     }
00720 
00721   args.converter = _this;
00722   args.flush = flush;
00723   args.offsets = offsets;
00724   args.source = (char *) *source;
00725   args.sourceLimit = sourceLimit;
00726   args.target =  *target;
00727   args.targetLimit = targetLimit;
00728   args.size = sizeof(args);
00729   if (offsets) {
00730     if (_this->sharedData->impl->toUnicodeWithOffsets != NULL) {
00731       _this->sharedData->impl->toUnicodeWithOffsets(&args, err);
00732       *source = args.source;
00733       *target = args.target;
00734       return;
00735     } else {
00736       /* all code points are of the same length */
00737       int32_t targetSize = targetLimit - *target;
00738       int32_t i, bytesPerChar = _this->sharedData->staticData->maxBytesPerChar;
00739 
00740       if(bytesPerChar == 1) {
00741         for (i=0; i<targetSize; i++) {
00742           offsets[i] = i;
00743         }
00744       } else if(bytesPerChar == 2) {
00745         for (i=0; i<targetSize; i++) {
00746           offsets[i] = i<<1;
00747         }
00748       } else {
00749         for (i=0; i<targetSize; i++) {
00750           offsets[i] = i*bytesPerChar;
00751         }
00752       }
00753     }
00754   }
00755 
00756   /*calls the specific conversion routines */
00757   _this->sharedData->impl->toUnicode(&args, err); 
00758 
00759   *source = args.source;
00760   *target = args.target;
00761   return;
00762 }
00763 
00764 int32_t   ucnv_fromUChars (const UConverter * converter,
00765                            char *target,
00766                            int32_t targetSize,
00767                            const UChar * source,
00768                            int32_t sourceSize,
00769                            UErrorCode * err)
00770 {
00771   const UChar *mySource_limit;
00772   int32_t mySourceLength = sourceSize;
00773   UConverter myConverter;
00774   char *myTarget_limit;
00775   int32_t targetCapacity = 0;
00776   UConverterFromUnicodeArgs args;
00777 
00778   if (U_FAILURE (*err))
00779     return 0;
00780 
00781   if ((converter == NULL) || (targetSize < 0))
00782     {
00783       *err = U_ILLEGAL_ARGUMENT_ERROR;
00784       return 0;
00785     }
00786 
00787   /*makes a local copy of the UConverter */
00788   myConverter = *converter;
00789 
00790 
00791   /*Removes all state info on the UConverter */
00792   ucnv_reset (&myConverter);
00793 
00794   /*if the source is empty we return immediately */
00795   if (sourceSize == -1) {
00796     mySourceLength = u_strlen (source);
00797   } 
00798   if (mySourceLength == 0)
00799     {
00800       /*for consistency we still need to
00801        *store 0 in the targetCapacity
00802        *if the user requires it
00803        */
00804       return 0;
00805     }
00806 
00807   mySource_limit = source + mySourceLength;
00808   myTarget_limit = target + targetSize;
00809 
00810   /* Pin the limit to U_MAX_PTR.  NULL check is for AS/400. */
00811   if((myTarget_limit < target) || (myTarget_limit == NULL)) {
00812     myTarget_limit = (char *)U_MAX_PTR;
00813   }
00814 
00815   args.converter = &myConverter;
00816   args.flush = TRUE;
00817   args.offsets = NULL;
00818   args.source = source;
00819   args.sourceLimit = mySource_limit;
00820   args.target = target;
00821   args.targetLimit = myTarget_limit;
00822   args.size = sizeof(args);
00823   if (targetSize > 0)
00824     {
00825       /*calls the specific conversion routines */
00826       args.converter->sharedData->impl->fromUnicode(&args, err); 
00827   
00828       targetCapacity = args.target - target;
00829     }
00830 
00831   /*Updates targetCapacity to contain the number of bytes written to target */
00832 
00833   /* If the output buffer is exhausted, we need to stop writing
00834    * to it but continue the conversion in order to store in targetSize
00835    * the number of bytes that was required*/
00836   if (*err == U_BUFFER_OVERFLOW_ERROR || targetSize == 0)
00837     {
00838       char target2[CHUNK_SIZE];
00839       const char *target2_limit = target2 + CHUNK_SIZE;
00840 
00841       /*We use a stack allocated buffer around which we loop
00842        *(in case the output is greater than CHUNK_SIZE)
00843        */
00844       do
00845         {
00846           *err = U_ZERO_ERROR;
00847           args.target = target2;
00848           args.targetLimit = target2_limit;
00849           args.converter->sharedData->impl->fromUnicode(&args, err); 
00850           /*updates the output parameter to contain the number of char required */
00851           targetCapacity += (args.target - target2);
00852         } while (*err == U_BUFFER_OVERFLOW_ERROR);
00853       /*We will set the error code to U_BUFFER_OVERFLOW_ERROR only if
00854        *nothing graver happened in the previous loop*/
00855       if (U_SUCCESS (*err))
00856         *err = U_BUFFER_OVERFLOW_ERROR;
00857     }
00858 
00859   return targetCapacity;
00860 }
00861 
00862 int32_t ucnv_toUChars (const UConverter * converter,
00863                        UChar * target,
00864                        int32_t targetSize,
00865                        const char *source,
00866                        int32_t sourceSize,
00867                        UErrorCode * err)
00868 {
00869   const char *mySource_limit = source + sourceSize;
00870   UConverter myConverter;
00871   UChar *myTarget_limit;
00872   int32_t targetCapacity;
00873   UConverterToUnicodeArgs args;
00874 
00875   if (U_FAILURE (*err))
00876     return 0;
00877 
00878   if ((converter == NULL) || (targetSize < 0) || (sourceSize < 0))
00879     {
00880       *err = U_ILLEGAL_ARGUMENT_ERROR;
00881       return 0;
00882     }
00883   /*Means there is no work to be done */
00884   if (sourceSize == 0)
00885     {
00886       /*for consistency we still need to
00887        *store 0 in the targetCapacity
00888        *if the user requires it
00889        */
00890       if (targetSize >= 1)
00891         {
00892           target[0] = 0x0000;
00893           return 1;
00894         }
00895       else
00896         return 0;
00897     }
00898 
00899   /*makes a local copy of the UConverter */
00900   myConverter = *converter;
00901 
00902   /*Removes all state info on the UConverter */
00903   ucnv_reset (&myConverter);
00904 
00905   args.converter = &myConverter;
00906   args.flush = TRUE;
00907   args.offsets = NULL;
00908   args.source = source;
00909   args.sourceLimit = mySource_limit;
00910   args.target = target;
00911   args.size = sizeof(args);
00912   if (targetSize > 0)
00913   {
00914       myTarget_limit = target + targetSize - 1;
00915 
00916       /* Pin the limit to U_MAX_PTR.  NULL check is for AS/400. */
00917       if ((myTarget_limit == NULL) || (myTarget_limit < target)) {
00918           myTarget_limit = ((UChar*)U_MAX_PTR) - 1; 
00919       }
00920 
00921       /*Not in pure pre-flight mode */
00922 
00923       args.targetLimit = myTarget_limit;
00924       args.converter->sharedData->impl->toUnicode(&args, err); 
00925 
00926       /*Null terminates the string */
00927       *(args.target) = 0x0000;
00928     }
00929 
00930 
00931   /*Rigs targetCapacity to have at least one cell for zero termination */
00932   /*Updates targetCapacity to contain the number of bytes written to target */
00933   targetCapacity = 1;
00934   targetCapacity += args.target - target;
00935 
00936   /* If the output buffer is exhausted, we need to stop writing
00937    * to it but if the input buffer is not exhausted,
00938    * we need to continue the conversion in order to store in targetSize
00939    * the number of bytes that was required
00940    */
00941   if (*err == U_BUFFER_OVERFLOW_ERROR || targetSize == 0)
00942     {
00943       UChar target2[CHUNK_SIZE];
00944       const UChar *target2_limit = target2 + CHUNK_SIZE;
00945 
00946       /*We use a stack allocated buffer around which we loop
00947          (in case the output is greater than CHUNK_SIZE) */
00948       do
00949         {
00950           *err = U_ZERO_ERROR;
00951           args.target = target2;
00952           args.targetLimit = target2_limit;
00953           args.converter->sharedData->impl->toUnicode(&args, err); 
00954           /*updates the output parameter to contain the number of char required */
00955           targetCapacity += args.target - target2;
00956         } while (*err == U_BUFFER_OVERFLOW_ERROR);
00957 
00958       if (U_SUCCESS (*err))
00959         *err = U_BUFFER_OVERFLOW_ERROR;
00960     }
00961 
00962   return targetCapacity;
00963 }
00964 
00965 UChar32 ucnv_getNextUChar(UConverter * converter,
00966                           const char **source,
00967                           const char *sourceLimit,
00968                           UErrorCode * err)
00969 {
00970   UConverterToUnicodeArgs args;
00971   UChar32 ch;
00972 
00973   if(err == NULL || U_FAILURE(*err)) {
00974       return 0xffff;
00975   }
00976 
00977   /* In case internal data had been stored
00978    * we return the first UChar32 in the internal buffer,
00979    * and update the internal state accordingly
00980    */
00981   if (converter->UCharErrorBufferLength > 0)
00982     {
00983       UTextOffset i = 0;
00984       UChar32 myUChar;
00985       UTF_NEXT_CHAR(converter->UCharErrorBuffer, i, sizeof(converter->UCharErrorBuffer), myUChar);
00986       /*In this memmove we update the internal buffer by
00987        *popping the first character.
00988          *Note that in the call itself we decrement
00989          *UCharErrorBufferLength
00990        */
00991       uprv_memmove (converter->UCharErrorBuffer,
00992                    converter->UCharErrorBuffer + i,
00993                    (converter->UCharErrorBufferLength - i) * sizeof (UChar));
00994       converter->UCharErrorBufferLength -= (int8_t)i;
00995       return myUChar;
00996     }
00997   /*calls the specific conversion routines */
00998   /*as dictated in a code review, avoids a switch statement */
00999   args.converter = converter;
01000   args.flush = TRUE;
01001   args.offsets = NULL;
01002   args.source = *source;
01003   args.sourceLimit = sourceLimit;
01004   args.target = NULL;
01005   args.targetLimit = NULL;
01006   args.size = sizeof(args);
01007   ch = converter->sharedData->impl->getNextUChar(&args, err);
01008   *source = args.source;
01009   return ch;
01010 }
01011 
01012 
01013 
01014 /**************************
01015 * Will convert a sequence of bytes from one codepage to another.
01016 * @param toConverterName: The name of the converter that will be used to encode the output buffer
01017 * @param fromConverterName: The name of the converter that will be used to decode the input buffer
01018 * @param target: Pointer to the output buffer* written
01019 * @param targetLength: on input contains the capacity of target, on output the number of bytes copied to target
01020 * @param source: Pointer to the input buffer
01021 * @param sourceLength: on input contains the capacity of source, on output the number of bytes processed in "source"
01022 * @param internal: used internally to store store state data across calls
01023 * @param err: fills in an error status
01024 */
01025 static void 
01026 T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
01027                                      UConverter * inConverter,
01028                                      char **target,
01029                                      const char *targetLimit,
01030                                      const char **source,
01031                                      const char *sourceLimit,
01032                                      int32_t* offsets,
01033                                      UBool flush,
01034                                      UErrorCode * err)
01035 {
01036 
01037   UChar out_chunk[CHUNK_SIZE];
01038   const UChar *out_chunk_limit = out_chunk + CHUNK_SIZE;
01039   UChar *out_chunk_alias;
01040   UChar const *out_chunk_alias2;
01041 
01042 
01043   if (U_FAILURE (*err))    return;
01044 
01045 
01046   /*loops until the input buffer is completely consumed
01047    *or if an error has be encountered
01048    *first we convert from inConverter codepage to Unicode
01049    *then from Unicode to outConverter codepage
01050    */
01051   while ((*source != sourceLimit) && U_SUCCESS (*err))
01052     {
01053       out_chunk_alias = out_chunk;
01054       ucnv_toUnicode (inConverter,
01055                       &out_chunk_alias,
01056                       out_chunk_limit,
01057                       source,
01058                       sourceLimit,
01059                       NULL,
01060                       flush,
01061                       err);
01062 
01063       /*U_BUFFER_OVERFLOW_ERROR means that the output "CHUNK" is full
01064        *we will require at least another loop (it's a recoverable error)
01065        */
01066       if (U_SUCCESS (*err) || (*err == U_BUFFER_OVERFLOW_ERROR))
01067         {
01068           *err = U_ZERO_ERROR;
01069           out_chunk_alias2 = out_chunk;
01070 
01071           while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS (*err))
01072             {
01073               ucnv_fromUnicode (outConverter,
01074                                 target,
01075                                 targetLimit,
01076                                 &out_chunk_alias2,
01077                                 out_chunk_alias,
01078                                 NULL,
01079                                 TRUE,
01080                                 err);
01081             }
01082         }
01083       else
01084         break;
01085     }
01086 
01087   return;
01088 }
01089 
01090 int32_t  ucnv_convert(const char *toConverterName,
01091                       const char *fromConverterName,
01092                       char *target,
01093                       int32_t targetSize,
01094                       const char *source,
01095                       int32_t sourceSize,
01096                       UErrorCode * err)
01097 {
01098   const char *mySource = source;
01099   const char *mySource_limit = source + sourceSize;
01100   UConverter *inConverter;
01101   UConverter *outConverter;
01102   char *myTarget = target;
01103   int32_t targetCapacity = 0;
01104 
01105   if (U_FAILURE (*err))
01106     return 0;
01107 
01108   if ((targetSize < 0) || (sourceSize < 0))
01109     {
01110       *err = U_ILLEGAL_ARGUMENT_ERROR;
01111       return 0;
01112     }
01113 
01114   /*if there is no input data, we're done */
01115   if (sourceSize == 0)
01116     {
01117       /*in case the caller passed an output ptr
01118        *we update it
01119        */
01120       return 0;
01121     }
01122 
01123   /*create the converters */
01124   inConverter = ucnv_open (fromConverterName, err);
01125   if (U_FAILURE (*err)) return 0;
01126   outConverter = ucnv_open (toConverterName, err);
01127   if (U_FAILURE (*err))
01128     {
01129       ucnv_close (inConverter);
01130       return 0;
01131     }
01132 
01133 
01134   if (targetSize > 0)
01135     {
01136       T_UConverter_fromCodepageToCodepage (outConverter,
01137                                            inConverter,
01138                                            &myTarget,
01139                                            target + targetSize,
01140                                            &mySource,
01141                                            mySource_limit,
01142                                            NULL,
01143                                            TRUE,
01144                                            err);
01145       /*Updates targetCapacity to contain the number of bytes written to target */
01146       targetCapacity = myTarget - target;
01147     }
01148 
01149   /* If the output buffer is exhausted (or we are "pre-flighting"), we need to stop writing
01150    * to it but continue the conversion in order to store in targetSize
01151    * the number of bytes that was required*/
01152   if (*err == U_BUFFER_OVERFLOW_ERROR || targetSize == 0)
01153     {
01154       char target2[CHUNK_SIZE];
01155       char *target2_alias = target2;
01156       const char *target2_limit = target2 + CHUNK_SIZE;
01157 
01158       /*We use a stack allocated buffer around which we loop
01159        *(in case the output is greater than CHUNK_SIZE)
01160        */
01161 
01162       do
01163         {
01164           *err = U_ZERO_ERROR;
01165           target2_alias = target2;
01166           T_UConverter_fromCodepageToCodepage (outConverter,
01167                                                inConverter,
01168                                                &target2_alias,
01169                                                target2_limit,
01170                                                &mySource,
01171                                                mySource_limit,
01172                                                NULL,
01173                                                TRUE,
01174                                                err);
01175 
01176           /*updates the output parameter to contain the number of char required */
01177           targetCapacity += (target2_alias - target2);
01178     } while (*err == U_BUFFER_OVERFLOW_ERROR);
01179 
01180       /*We will set the error code to U_BUFFER_OVERFLOW_ERROR only if
01181        *nothing graver happened in the previous loop*/
01182       if (U_SUCCESS (*err))
01183         *err = U_BUFFER_OVERFLOW_ERROR;
01184     }
01185 
01186   ucnv_close (inConverter);
01187   ucnv_close (outConverter);
01188 
01189   return targetCapacity;
01190 }
01191 
01192 UConverterType ucnv_getType(const UConverter* converter)
01193 {
01194   return (UConverterType)converter->sharedData->staticData->conversionType;
01195 }
01196 
01197 void ucnv_getStarters(const UConverter* converter, 
01198                       UBool starters[256],
01199                       UErrorCode* err)
01200 {
01201     if (err == NULL || U_FAILURE(*err)) {
01202         return;
01203     }
01204 
01205     if(converter->sharedData->impl->getStarters != NULL) {
01206         converter->sharedData->impl->getStarters(converter, starters, err);
01207     } else {
01208         *err = U_ILLEGAL_ARGUMENT_ERROR;
01209     }
01210 }
01211 
01212 static int32_t ucnv_getAmbiguousCCSID(const UConverter *cnv)
01213 {
01214     UErrorCode status = U_ZERO_ERROR;
01215     int32_t i = 0;
01216     int32_t ccsid = 0;
01217     if (cnv == NULL) 
01218     {
01219         return -1;
01220     }
01221     ccsid = ucnv_getCCSID(cnv, &status);
01222     if (U_FAILURE(status)) 
01223     {
01224         return -1;
01225     }
01226     for (i = 0; i < UCNV_MAX_AMBIGUOUSCCSIDS; i++) {
01227         if (ccsid == UCNV_AMBIGUOUSCONVERTERS[i].ccsid) 
01228         {
01229             return i;
01230         }
01231     }
01232     return -1;
01233 }
01234 
01235 void ucnv_fixFileSeparator(const UConverter *cnv, 
01236                            UChar* source, 
01237                            int32_t sourceLength)
01238 {
01239     int32_t i = 0;
01240     int32_t offset = 0;
01241     if ((source == NULL) || (cnv == NULL))
01242     {
01243         return;
01244     }
01245     if ((offset = ucnv_getAmbiguousCCSID(cnv)) != -1)
01246     {
01247         for (i = 0; i < sourceLength; i++) 
01248         {
01249             if (source[i] == UCNV_AMBIGUOUSCONVERTERS[offset].mismapped)
01250             {
01251                 source[i] = UCNV_AMBIGUOUSCONVERTERS[offset].replacement;
01252             }
01253         }
01254     }
01255 }
01256 
01257 UBool ucnv_isAmbiguous(const UConverter *cnv)
01258 {
01259     return (UBool)(ucnv_getAmbiguousCCSID(cnv) == -1 ? FALSE : TRUE);
01260 }
01261 
01262 void ucnv_setFallback(UConverter *cnv, UBool usesFallback)
01263 {
01264     cnv->useFallback = usesFallback;
01265 }
01266 
01267 UBool ucnv_usesFallback(const UConverter *cnv)
01268 {
01269     return cnv->useFallback;
01270 }
01271 
01272 void 
01273 ucnv_getInvalidChars (const UConverter * converter,
01274                       char *errBytes,
01275                       int8_t * len,
01276                       UErrorCode * err)
01277 {
01278     if (err == NULL || U_FAILURE(*err))
01279     {
01280         return;
01281     }
01282     if (len == NULL || errBytes == NULL || converter == NULL)
01283     {
01284         *err = U_ILLEGAL_ARGUMENT_ERROR;
01285         return;
01286     }
01287     if (*len < converter->invalidCharLength)
01288     {
01289         *err = U_INDEX_OUTOFBOUNDS_ERROR;
01290         return;
01291     }
01292     if ((*len = converter->invalidCharLength) > 0)
01293     {
01294         uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
01295     }
01296 }
01297 
01298 void 
01299 ucnv_getInvalidUChars (const UConverter * converter,
01300                        UChar *errChars,
01301                        int8_t * len,
01302                        UErrorCode * err)
01303 {
01304     if (err == NULL || U_FAILURE(*err))
01305     {
01306         return;
01307     }
01308     if (len == NULL || errChars == NULL || converter == NULL)
01309     {
01310         *err = U_ILLEGAL_ARGUMENT_ERROR;
01311         return;
01312     }
01313     if (*len < converter->invalidUCharLength)
01314     {
01315         *err = U_INDEX_OUTOFBOUNDS_ERROR;
01316         return;
01317     }
01318     if ((*len = converter->invalidUCharLength) > 0)
01319     {
01320         uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
01321     }
01322 }
01323 
01324 /*
01325  * Hey, Emacs, please set the following:
01326  *
01327  * Local Variables:
01328  * indent-tabs-mode: nil
01329  * End:
01330  *
01331  */
01332 

Generated at Tue Dec 5 10:47:57 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000