Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnvsbcs.c

00001 /*  
00002 **********************************************************************
00003 *   Copyright (C) 2000, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *   file name:  ucnvsbcs.cpp
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2000feb03
00012 *   created by: Markus W. Scherer
00013 *
00014 *   Change history:
00015 *
00016 *   05/09/00    helena      Added implementation to handle fallback mappings.
00017 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
00018 *   06/29/2000  helena      Major rewrite of the callback APIs.
00019 */
00020 
00021 #include "unicode/utypes.h"
00022 #include "cmemory.h"
00023 #include "ucmp16.h"
00024 #include "ucmp8.h"
00025 #include "unicode/ucnv_err.h"
00026 #include "ucnv_bld.h"
00027 #include "unicode/ucnv.h"
00028 #include "ucnv_cnv.h"
00029 
00030 /* SBCS --------------------------------------------------------------------- */
00031 
00032 static void
00033 _SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
00034     const uint8_t *oldraw = raw;
00035     sharedData->table->sbcs.toUnicode = (UChar *)raw;
00036     raw += sizeof(uint16_t)*256; oldraw = raw;
00037     ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode);
00038     if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
00039     {
00040         if(((raw-oldraw)&3)!=0) {
00041             raw+=4-((raw-oldraw)&3);    /* pad to 4 */
00042         }
00043         ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode);    
00044     }
00045     if (sharedData->staticData->hasToUnicodeFallback == TRUE)
00046     {
00047         if(((raw-oldraw)&3)!=0) {
00048             raw+=4-((raw-oldraw)&3);    /* pad to 4 */
00049         }
00050         sharedData->table->sbcs.toUnicodeFallback = (UChar *)raw;
00051     }
00052 }
00053 
00054 static void
00055 _SBCSUnload(UConverterSharedData *sharedData) {
00056     ucmp8_close (&sharedData->table->sbcs.fromUnicode);
00057     if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
00058         ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback);
00059 }
00060 
00061 U_CFUNC void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
00062                                   UErrorCode * err)
00063 {
00064   char *mySource = (char *) args->source;
00065   UChar *myTarget = args->target;
00066   int32_t mySourceIndex = 0;
00067   int32_t myTargetIndex = 0;
00068   int32_t targetLength = args->targetLimit - myTarget;
00069   int32_t sourceLength = args->sourceLimit - (char *) mySource;
00070   UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL;
00071   UChar targetUniChar = 0x0000;
00072   
00073   myToUnicode = args->converter->sharedData->table->sbcs.toUnicode;
00074   myToUnicodeFallback = args->converter->sharedData->table->sbcs.toUnicodeFallback;
00075   while (mySourceIndex < sourceLength)
00076     {
00077 
00078       /*writing the UniChar to the output stream */
00079       if (myTargetIndex < targetLength)
00080         {
00081           /*gets the corresponding UniChar */
00082           targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
00083 
00084           if (targetUniChar < 0xfffe)
00085             {
00086               /* writes the UniChar to the output stream */
00087               myTarget[myTargetIndex++] = targetUniChar;
00088             }
00089           else
00090             {
00091               if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
00092                   (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
00093               {
00094                   /* Look up in the fallback table first */
00095                   UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
00096                   if (fallbackUniChar < 0xfffe)
00097                   {
00098                       myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
00099                   }
00100               }
00101               if (targetUniChar >= 0xfffe)
00102               {
00103                   const char *saveSource = args->source;
00104                   UChar *saveTarget = args->target;
00105                   int32_t *saveOffsets = args->offsets;
00106                   UConverterCallbackReason reason;
00107 
00108                   if (targetUniChar == 0xfffe)
00109                   {
00110                     reason = UCNV_UNASSIGNED;
00111                     *err = U_INVALID_CHAR_FOUND;
00112                   }
00113                   else
00114                   {
00115                     reason = UCNV_ILLEGAL;
00116                     *err = U_ILLEGAL_CHAR_FOUND;
00117                   }
00118 
00119                   args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
00120                   args->converter->invalidCharLength = 1;
00121 
00122                   args->target = myTarget + myTargetIndex;
00123                   args->source = mySource + mySourceIndex;
00124 
00125                   /* to do hsys: add more smarts to the codeUnits and length later */
00126                   ToU_CALLBACK_MACRO(args->converter->toUContext,
00127                                      args,
00128                                      args->converter->invalidCharBuffer,
00129                                      args->converter->invalidCharLength, 
00130                                      reason,
00131                                      err);
00132                   /* Hsys: calculate the source and target advancement */
00133                   args->source = saveSource;
00134                   args->target = saveTarget;
00135                   args->offsets = saveOffsets;
00136                   if (U_FAILURE (*err)) break;
00137                   args->converter->invalidCharLength = 0;
00138               }
00139             }
00140         }
00141       else
00142         {
00143           *err = U_BUFFER_OVERFLOW_ERROR;
00144           break;
00145         }
00146     }
00147   
00148   args->target += myTargetIndex;
00149   args->source += mySourceIndex;
00150 
00151   return;
00152 }
00153 
00154 U_CFUNC void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
00155                                  UErrorCode * err)
00156 {
00157   const UChar *mySource = args->source;
00158   unsigned char *myTarget = (unsigned char *) args->target;
00159   int32_t mySourceIndex = 0;
00160   int32_t myTargetIndex = 0;
00161   int32_t targetLength = args->targetLimit - (char *) myTarget;
00162   int32_t sourceLength = args->sourceLimit - mySource;
00163   CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
00164   unsigned char targetChar = 0x00;
00165   UConverterCallbackReason reason;
00166 
00167   myFromUnicode = &args->converter->sharedData->table->sbcs.fromUnicode;
00168   myFromUnicodeFallback = &args->converter->sharedData->table->sbcs.fromUnicodeFallback;
00169   /*writing the char to the output stream */
00170   /* HSYS : to do : finish the combining of the surrogate characters later */
00171   /*
00172   if (args->converter->fromUSurrogateLead != 0 && UTF_IS_TRAIL(mySource[mySourceIndex]))
00173   {
00174   }
00175   */
00176   while (mySourceIndex < sourceLength)
00177     {
00178       targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
00179 
00180       if (myTargetIndex < targetLength)
00181         {
00182           mySourceIndex++;
00183           if (targetChar != 0 || !mySource[mySourceIndex - 1])
00184             {
00185               /*writes the char to the output stream */
00186               myTarget[myTargetIndex++] = targetChar;
00187             }
00188           else if (UCNV_FROM_U_USE_FALLBACK(args->converter, mySource[mySourceIndex-1]) &&
00189                   (args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
00190           {
00191               /* Look up in the fallback table first */
00192               targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]);
00193               if (targetChar != 0 || !mySource[mySourceIndex - 1])
00194                 {
00195                   /*writes the char to the output stream */
00196                   myTarget[myTargetIndex++] = targetChar;
00197                 }
00198           }
00199           if (targetChar == 0 && mySource[mySourceIndex-1] != 0)
00200           {
00201               *err = U_INVALID_CHAR_FOUND;
00202               reason = UCNV_UNASSIGNED;
00203               
00204               args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
00205               args->converter->invalidUCharLength = 1;
00206               if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
00207               {
00208                   /*if (mySource < args->sourceLimit)*/
00209                   if(mySourceIndex < sourceLength)
00210                   {
00211                       if (UTF_IS_TRAIL(mySource[mySourceIndex]))
00212                       {
00213                           args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
00214                           args->converter->invalidUCharLength++;
00215                           mySourceIndex++;
00216                       }
00217                       else 
00218                       {
00219                           reason = UCNV_ILLEGAL;
00220                       }                          
00221                   }
00222                   else if (args->flush == TRUE)
00223                   {
00224                       reason = UCNV_ILLEGAL;
00225                       *err = U_TRUNCATED_CHAR_FOUND;
00226                   } 
00227                   else 
00228                   {
00229                       args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
00230                       /* do not call the callback */
00231                   }
00232               }
00233               if (args->converter->fromUSurrogateLead == 0) 
00234               {
00235                   const UChar *saveSource = args->source;
00236                   char *saveTarget = args->target;
00237                   int32_t *saveOffsets = args->offsets;
00238                   args->target = (char *)myTarget+myTargetIndex;
00239                   args->source = mySource+mySourceIndex;
00240                   /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
00241                   /* Check if we have encountered a surrogate pair.  If first UChar is lead byte
00242                    and second UChar is trail byte, it's a surrogate char.  If UChar is lead byte 
00243                    but second UChar is not trail byte, it's illegal sequence.  If neither, it's
00244                    plain unassigned code point.*/
00245                    FromU_CALLBACK_MACRO(args->converter->fromUContext,
00246                                          args,
00247                                          args->converter->invalidUCharBuffer,
00248                                          args->converter->invalidUCharLength,
00249                                          (UChar32) (args->converter->invalidUCharLength == 2 ? 
00250                                              UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
00251                                                                   args->converter->invalidUCharBuffer[1]) 
00252                                                     : args->converter->invalidUCharBuffer[0]),
00253                                          reason,
00254                                          err);
00255                   args->source = saveSource;
00256                   args->target = saveTarget;
00257                   args->offsets = saveOffsets;
00258                   if (U_FAILURE (*err))
00259                     {
00260                       break;
00261                     }
00262                   args->converter->invalidUCharLength = 0;
00263               }
00264           }               
00265         }
00266       else
00267         {
00268           *err = U_BUFFER_OVERFLOW_ERROR;
00269           break;
00270         }
00271 
00272     }
00273 
00274   args->target += myTargetIndex;
00275   args->source += mySourceIndex;
00276 
00277 
00278   return;
00279 }
00280 
00281 U_CFUNC UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
00282                                                UErrorCode* err)
00283 {
00284   UChar myUChar;
00285   
00286   if (U_FAILURE(*err)) return 0xffff;
00287 
00288   if (args->source+1 > args->sourceLimit) 
00289     {
00290       *err = U_INDEX_OUTOFBOUNDS_ERROR;
00291       return 0xffff;
00292     }
00293   
00294   /*Gets the corresponding codepoint*/
00295   myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
00296   
00297   if (myUChar < 0xfffe) return myUChar;
00298   else
00299     {      
00300       UChar* myUCharPtr = &myUChar;
00301       UConverterCallbackReason reason;
00302 
00303       /* Do the fallback stuff */
00304       if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
00305           (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
00306       {
00307           UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
00308           if (fallbackUChar < 0xfffe) return fallbackUChar;
00309       }
00310 
00311       if (myUChar == 0xfffe)
00312       {
00313         reason = UCNV_UNASSIGNED;
00314         *err = U_INVALID_CHAR_FOUND;
00315       }
00316       else
00317       {
00318         reason = UCNV_ILLEGAL;
00319         *err = U_ILLEGAL_CHAR_FOUND;
00320       }
00321 
00322       /*Calls the ErrorFunctor */
00323       /*It's is very likely that the ErrorFunctor will write to the
00324        *internal buffers */
00325       args->target = myUCharPtr;
00326       args->targetLimit = myUCharPtr + 1;
00327       args->converter->fromCharErrorBehaviour(args->converter->toUContext,
00328                                     args,
00329                                     args->source - 1,
00330                                     1,
00331                                     reason,
00332                                     err);
00333 
00334       /*makes the internal caching transparent to the user*/
00335       if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
00336       
00337       return myUChar;
00338     }
00339 }
00340 
00341 static const UConverterImpl _SBCSImpl={
00342     UCNV_SBCS,
00343 
00344     _SBCSLoad,
00345     _SBCSUnload,
00346 
00347     NULL,
00348     NULL,
00349     NULL,
00350 
00351     T_UConverter_toUnicode_SBCS,
00352     NULL,
00353     T_UConverter_fromUnicode_SBCS,
00354     NULL,
00355     T_UConverter_getNextUChar_SBCS,
00356 
00357     NULL,
00358     NULL
00359 };
00360 
00361 
00362 /* Static data is in tools/makeconv/ucnvstat.c for data-based
00363  * converters. Be sure to update it as well.
00364  */
00365 
00366 const UConverterSharedData _SBCSData={
00367     sizeof(UConverterSharedData), 1,
00368     NULL, NULL, NULL, FALSE, &_SBCSImpl, 
00369     0
00370 };
00371 
00372 /* DBCS --------------------------------------------------------------------- */
00373 
00374 U_CFUNC void
00375 _DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
00376     const uint8_t *oldraw = raw;
00377     ucmp16_initFromData(&sharedData->table->dbcs.toUnicode,&raw, pErrorCode);
00378     if(((raw-oldraw)&3)!=0) {
00379         raw+=4-((raw-oldraw)&3);    /* pad to 4 */
00380     }
00381     oldraw = raw;
00382     ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode);
00383     if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
00384     {
00385         if(((raw-oldraw)&3)!=0) {
00386             raw+=4-((raw-oldraw)&3);    /* pad to 4 */
00387         }
00388         ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode);
00389         oldraw = raw;
00390     }
00391     if (sharedData->staticData->hasToUnicodeFallback == TRUE)
00392     {
00393         if(((raw-oldraw)&3)!=0) {
00394             raw+=4-((raw-oldraw)&3);    /* pad to 4 */
00395         }
00396         ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode);
00397     }    
00398 }
00399 
00400 U_CFUNC void
00401 _DBCSUnload(UConverterSharedData *sharedData) {
00402     ucmp16_close (&sharedData->table->dbcs.fromUnicode);
00403     ucmp16_close (&sharedData->table->dbcs.toUnicode);
00404     if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
00405         ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback);
00406     if (sharedData->staticData->hasToUnicodeFallback == TRUE)
00407         ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback);
00408 }
00409 
00410 U_CFUNC void   T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
00411                                     UErrorCode * err)
00412 {
00413   const char *mySource = ( char *) args->source;
00414   UChar *myTarget = args->target;
00415   int32_t mySourceIndex = 0;
00416   int32_t myTargetIndex = 0;
00417   int32_t targetLength = args->targetLimit - myTarget;
00418   int32_t sourceLength = args->sourceLimit - (char *) mySource;
00419   CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
00420   UChar targetUniChar = 0x0000;
00421   UChar mySourceChar = 0x0000;
00422 
00423   myToUnicode = &args->converter->sharedData->table->dbcs.toUnicode;
00424   myToUnicodeFallback = &args->converter->sharedData->table->dbcs.toUnicodeFallback;
00425 
00426   while (mySourceIndex < sourceLength)
00427     {
00428       if (myTargetIndex < targetLength)
00429         {
00430           /*gets the corresponding UniChar */
00431           mySourceChar = (unsigned char) mySource[mySourceIndex++];
00432 
00433           /*We have no internal state, we should */
00434           if (args->converter->toUnicodeStatus == 0x00)
00435             {
00436               args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
00437             }
00438           else
00439             {
00440               if (args->converter->toUnicodeStatus != 0x00)
00441                 {
00442                   mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
00443                   args->converter->toUnicodeStatus = 0x00;
00444                 }
00445 
00446               targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
00447 
00448               /*writing the UniChar to the output stream */
00449               if (targetUniChar < 0xfffe)
00450                 {
00451                   /*writes the UniChar to the output stream */
00452                   myTarget[myTargetIndex++] = targetUniChar;
00453                 }
00454               else if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
00455                   (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
00456               {
00457                   UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
00458                   if (fallbackUniChar < 0xfffe)
00459                   {
00460                       myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
00461                   }
00462               }
00463               if (targetUniChar >= 0xfffe)
00464                 {
00465                   const char *saveSource = args->source;
00466                   UChar *saveTarget = args->target;
00467                   int32_t *saveOffsets = args->offsets;
00468                   UConverterCallbackReason reason;
00469 
00470                   if (targetUniChar == 0xfffe)
00471                   {
00472                     reason = UCNV_UNASSIGNED;
00473                     *err = U_INVALID_CHAR_FOUND;
00474                   }
00475                   else
00476                   {
00477                     reason = UCNV_ILLEGAL;
00478                     *err = U_ILLEGAL_CHAR_FOUND;
00479                   }
00480 
00481                   args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
00482                   args->converter->invalidCharBuffer[1] = (char) mySourceChar;
00483                   args->converter->invalidCharLength = 2;
00484                   
00485                   args->target = myTarget + myTargetIndex;
00486                   args->source = mySource + mySourceIndex;
00487 
00488                   /* to do hsys: add more smarts to the codeUnits and length later */
00489                   ToU_CALLBACK_MACRO(args->converter->toUContext,
00490                                      args,
00491                                      args->converter->invalidCharBuffer,
00492                                      args->converter->invalidCharLength, 
00493                                      reason,
00494                                      err);
00495                   /* Hsys: calculate the source and target advancement */
00496                   args->source = saveSource;
00497                   args->target = saveTarget;
00498                   args->offsets = saveOffsets;
00499                   if (U_FAILURE (*err)) break;
00500                   args->converter->invalidCharLength = 0;
00501                 }
00502             }
00503         }
00504       else
00505         {
00506           *err = U_BUFFER_OVERFLOW_ERROR;
00507           break;
00508         }
00509     }
00510 
00511   /*If at the end of conversion we are still carrying state information
00512    *flush is TRUE, we can deduce that the input stream is truncated
00513    */
00514   if ((args->flush == TRUE)
00515       && (mySourceIndex == sourceLength)
00516       && (args->converter->toUnicodeStatus != 0x00))
00517     {
00518        
00519       if (U_SUCCESS(*err)) 
00520         {
00521           *err = U_TRUNCATED_CHAR_FOUND;
00522           args->converter->toUnicodeStatus = 0x00;
00523         }
00524     }
00525 
00526   args->target += myTargetIndex;
00527   args->source += mySourceIndex;
00528 
00529   return;
00530 }
00531 
00532 U_CFUNC void   T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
00533                                       UErrorCode * err)
00534 {
00535   const UChar *mySource = args->source;
00536   unsigned char *myTarget = (unsigned char *) args->target;
00537   int32_t mySourceIndex = 0;
00538   int32_t myTargetIndex = 0;
00539   int32_t targetLength = args->targetLimit - (char *) myTarget;
00540   int32_t sourceLength = args->sourceLimit - mySource;
00541   CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
00542   UChar targetUniChar = 0x0000;
00543   UChar mySourceChar = 0x0000;
00544   UConverterCallbackReason reason;
00545 
00546   myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode;
00547   myFromUnicodeFallback = &args->converter->sharedData->table->dbcs.fromUnicodeFallback;
00548 
00549   /*writing the char to the output stream */
00550   while (mySourceIndex < sourceLength)
00551     {
00552 
00553       if (myTargetIndex < targetLength)
00554         {
00555           mySourceChar = (UChar) mySource[mySourceIndex++];
00556 
00557           /*Gets the corresponding codepoint */
00558           targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
00559           if (targetUniChar != missingCharMarker)
00560             {
00561               /*writes the char to the output stream */
00562               myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
00563               if (myTargetIndex < targetLength)
00564                 {
00565                   myTarget[myTargetIndex++] = (char) targetUniChar;
00566                 }
00567               else
00568                 {
00569                   args->converter->charErrorBuffer[0] = (char) targetUniChar;
00570                   args->converter->charErrorBufferLength = 1;
00571                   *err = U_BUFFER_OVERFLOW_ERROR;
00572                 }
00573             }
00574           else if (UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
00575                   (args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
00576           {
00577 
00578               targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
00579               if (targetUniChar != missingCharMarker)
00580                 {
00581                     /*writes the char to the output stream */
00582                     myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
00583                     if (myTargetIndex < targetLength)
00584                       {
00585                         myTarget[myTargetIndex++] = (char) targetUniChar;
00586                       }
00587                     else
00588                       {
00589                         args->converter->charErrorBuffer[0] = (char) targetUniChar;
00590                         args->converter->charErrorBufferLength = 1;
00591                         *err = U_BUFFER_OVERFLOW_ERROR;
00592                       }
00593                 }
00594           }
00595           if (targetUniChar == missingCharMarker)  
00596           {
00597               *err = U_INVALID_CHAR_FOUND;
00598               reason = UCNV_UNASSIGNED;
00599               
00600               args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
00601               args->converter->invalidUCharLength = 1;
00602               if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
00603               {
00604                   /*if (mySource < args->sourceLimit) */
00605                   if(mySourceIndex < sourceLength)
00606                   {
00607                       if (UTF_IS_TRAIL(mySource[mySourceIndex]))
00608                       {
00609                           args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
00610                           args->converter->invalidUCharLength++;
00611                           mySourceIndex++;
00612                       }
00613                       else 
00614                       {
00615                           reason = UCNV_ILLEGAL;
00616                       }                          
00617                   }
00618                   else if (args->flush == TRUE)
00619                   {
00620                       reason = UCNV_ILLEGAL;
00621                       *err = U_TRUNCATED_CHAR_FOUND;
00622                   } 
00623                   else 
00624                   {
00625                       args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
00626                       /* do not call the callback */
00627                   }
00628               }
00629               if (args->converter->fromUSurrogateLead == 0) 
00630               {
00631                   /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
00632                   /* Check if we have encountered a surrogate pair.  If first UChar is lead byte
00633                    and second UChar is trail byte, it's a surrogate char.  If UChar is lead byte 
00634                    but second UChar is not trail byte, it's illegal sequence.  If neither, it's
00635                    plain unassigned code point.*/
00636                   const UChar *saveSource = args->source;
00637                   char *saveTarget = args->target;
00638                   int32_t *saveOffsets = args->offsets;
00639                   args->target = (char*)myTarget + myTargetIndex;
00640                   args->source = mySource + mySourceIndex;
00641                   FromU_CALLBACK_MACRO(args->converter->fromUContext,
00642                                          args,
00643                                          args->converter->invalidUCharBuffer,
00644                                          args->converter->invalidUCharLength,
00645                                          (UChar32) (args->converter->invalidUCharLength == 2 ? 
00646                                              UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
00647                                                                   args->converter->invalidUCharBuffer[1]) 
00648                                                     : args->converter->invalidUCharBuffer[0]),
00649                                          reason,
00650                                          err);
00651                   args->source = saveSource;
00652                   args->target = saveTarget;
00653                   args->offsets = saveOffsets;
00654                   if (U_FAILURE (*err))
00655                     {
00656                       break;
00657                     }
00658                   args->converter->invalidUCharLength = 0;
00659               }
00660             }
00661         }
00662       else
00663         {
00664           *err = U_BUFFER_OVERFLOW_ERROR;
00665           break;
00666         }
00667     }
00668 
00669   args->target += myTargetIndex;
00670   args->source += mySourceIndex;;
00671 
00672 
00673   return;
00674 }
00675 
00676 U_CFUNC UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
00677                                                UErrorCode* err)
00678 {
00679   UChar myUChar;
00680   
00681   if (U_FAILURE(*err)) return 0xffff;
00682   /*Checks boundaries and set appropriate error codes*/
00683   if (args->source+2 > args->sourceLimit) 
00684     {
00685       if (args->source >= args->sourceLimit)
00686         {
00687           /*Either caller has reached the end of the byte stream*/
00688           *err = U_INDEX_OUTOFBOUNDS_ERROR;
00689         }
00690       else if ((args->source+1) == args->sourceLimit)
00691         {
00692           /* a character was cut in half*/
00693           *err = U_TRUNCATED_CHAR_FOUND;
00694         }
00695       
00696       return 0xffff;
00697     }
00698 
00699   /*Gets the corresponding codepoint*/
00700   myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicode),
00701                         (uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source+1))));
00702   
00703   /*update the input pointer*/
00704   args->source += 2;
00705   if (myUChar < 0xfffe) return myUChar;
00706   else
00707     {      
00708       UChar* myUCharPtr = &myUChar;
00709       UConverterCallbackReason reason;
00710 
00711       /* Do the fallback stuff */
00712       if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
00713           (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
00714       {
00715           UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
00716                             (uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
00717           if (fallbackUChar < 0xfffe)
00718           {
00719               args->source += 2;
00720               return fallbackUChar;
00721           }
00722       }
00723       
00724       if (myUChar == 0xfffe)
00725       {
00726         reason = UCNV_UNASSIGNED;
00727         *err = U_INVALID_CHAR_FOUND;
00728       }
00729       else
00730       {
00731         reason = UCNV_ILLEGAL;
00732         *err = U_ILLEGAL_CHAR_FOUND;
00733       }
00734 
00735       args->target = myUCharPtr;
00736       args->targetLimit = myUCharPtr + 1;
00737       /*It's is very likely that the ErrorFunctor will write to the
00738        *internal buffers */
00739       args->converter->fromCharErrorBehaviour(args->converter->toUContext,
00740                                     args,
00741                                     args->source - 2,
00742                                     2,
00743                                     reason,
00744                                     err);
00745       /*makes the internal caching transparent to the user*/
00746       if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
00747 
00748       return myUChar;
00749     }
00750 } 
00751 
00752 static const UConverterImpl _DBCSImpl={
00753     UCNV_DBCS,
00754 
00755     _DBCSLoad,
00756     _DBCSUnload,
00757 
00758     NULL,
00759     NULL,
00760     NULL,
00761 
00762     T_UConverter_toUnicode_DBCS,
00763     NULL,
00764     T_UConverter_fromUnicode_DBCS,
00765     NULL,
00766     T_UConverter_getNextUChar_DBCS,
00767 
00768     NULL,
00769     NULL
00770 };
00771 
00772 
00773 /* Static data is in tools/makeconv/ucnvstat.c for data-based
00774  * converters. Be sure to update it as well.
00775  */
00776 
00777 const UConverterSharedData _DBCSData={
00778     sizeof(UConverterSharedData), 1,
00779     NULL, NULL, NULL, FALSE, &_DBCSImpl, 
00780     0, /* tounicodestatus */
00781 };

Generated at Tue Dec 5 10:48:04 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000