Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnvhz.c

00001 /*  
00002 **********************************************************************
00003 *   Copyright (C) 2000, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *   file name:  ucnvhz.c
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2000oct16
00012 *   created by: Ram Viswanadha
00013 *   10/31/2000  Ram     Implemented offsets logic function
00014 *   
00015 */
00016 
00017 #include "unicode/utypes.h"
00018 #include "cmemory.h"
00019 #include "ucmp16.h"
00020 #include "ucmp8.h"
00021 #include "unicode/ucnv_err.h"
00022 #include "ucnv_bld.h"
00023 #include "unicode/ucnv.h"
00024 #include "ucnv_cnv.h"
00025 #include "unicode/ustring.h"
00026 #include "cstring.h"
00027 
00028 #define UCNV_TILDE 0x7E          /* ~ */
00029 #define UCNV_OPEN_BRACE 0x7B     /* { */
00030 #define UCNV_CLOSE_BRACE 0x7D   /* } */
00031 #define SB_ESCAPE   "\x7E\x7D"
00032 #define DB_ESCAPE   "\x7E\x7B"
00033 
00034 #define TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS, myConverterData, err){ \
00035     if(*err ==U_BUFFER_OVERFLOW_ERROR){ \
00036     /*save the state and return */ \
00037     args->target += myTargetIndex; \
00038     args->source += mySourceIndex; \
00039     myConverterData->sourceIndex = 0; \
00040     myConverterData->targetIndex = 0; \
00041     args->converter->fromUnicodeStatus = isTargetUCharDBCS; \
00042     return; \
00043     } \
00044 }
00045 
00046 /*********** HZ Converter Protos ***********/
00047 static void _HZOpen(UConverter *cnv, const char *name, const char *locale, uint32_t options,UErrorCode *errorCode);
00048 static void _HZClose(UConverter *converter);
00049 static void _HZReset(UConverter *converter);
00050 
00051 U_CFUNC void UConverter_toUnicode_HZ(UConverterToUnicodeArgs *args,
00052                                              UErrorCode *err);
00053 
00054 U_CFUNC void UConverter_toUnicode_HZ_OFFSETS_LOGIC (UConverterToUnicodeArgs *args,
00055                                                             UErrorCode *err);
00056 
00057 U_CFUNC void UConverter_fromUnicode_HZ(UConverterFromUnicodeArgs *args,
00058                                                UErrorCode *err);
00059 
00060 U_CFUNC void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs *args,
00061                                                               UErrorCode *err);
00062 
00063 U_CFUNC UChar32 UConverter_getNextUChar_HZ (UConverterToUnicodeArgs *pArgs,
00064                                                     UErrorCode *pErrorCode);   
00065 
00066 static UConverterImpl _HZImpl={
00067     UCNV_HZ,
00068     
00069     NULL,
00070     NULL,
00071     
00072     _HZOpen,
00073     _HZClose,
00074     _HZReset,
00075     
00076     UConverter_toUnicode_HZ,
00077     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
00078     UConverter_fromUnicode_HZ,
00079     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
00080     UConverter_getNextUChar_HZ,
00081     
00082     NULL,
00083     NULL
00084 };
00085 
00086 const UConverterStaticData _HZStaticData={
00087     sizeof(UConverterStaticData),
00088         "HZ",
00089         2023, UCNV_IBM, UCNV_HZ, 1, 4,
00090     { 0x1a, 0, 0, 0 },1, FALSE, FALSE,
00091     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} /* reserved */
00092 };
00093             
00094             
00095 const UConverterSharedData _HZData={
00096     sizeof(UConverterSharedData), ~((uint32_t) 0),
00097         NULL, NULL, &_HZStaticData, FALSE, &_HZImpl, 
00098         0
00099 };
00100 
00101 typedef struct{
00102     int32_t targetIndex;
00103     int32_t sourceIndex;
00104     UBool isEscapeAppended;
00105     UConverter* gbConverter;
00106     UBool isStateDBCS;
00107     UBool isTargetUCharDBCS;
00108 }UConverterDataHZ;
00109 
00110 
00111 
00112 static void _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
00113     cnv->toUnicodeStatus = 0;
00114     cnv->fromUnicodeStatus= 0;
00115     cnv->mode=0;
00116     cnv->fromUSurrogateLead;
00117     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataHZ));
00118     if(cnv->extraInfo != NULL){
00119         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
00120         ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
00121         ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
00122         ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
00123         ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
00124         ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
00125          ucnv_setSubstChars(cnv,"\x7E\x7D\x1A", 3, errorCode);
00126     }
00127 
00128 
00129 }
00130 static void _HZClose(UConverter *cnv){
00131     
00132      ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
00133      uprv_free(cnv->extraInfo);
00134 
00135 }
00136 static void _HZReset(UConverter *cnv){
00137     cnv->toUnicodeStatus = 0;
00138     cnv->fromUnicodeStatus= 0;
00139     cnv->mode=0;
00140     cnv->fromUSurrogateLead; 
00141     if(cnv->extraInfo != NULL){
00142         ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
00143         ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
00144         ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
00145         ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
00146         ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
00147     }
00148 }
00149 
00150 /**************************************HZ Encoding*************************************************
00151 * Rules for HZ encoding
00152 * 
00153 *   In ASCII mode, a byte is interpreted as an ASCII character, unless a
00154 *   '~' is encountered. The character '~' is an escape character. By
00155 *   convention, it must be immediately followed ONLY by '~', '{' or '\n'
00156 *   (<LF>), with the following special meaning.
00157 
00158 *   1. The escape sequence '~~' is interpreted as a '~'.
00159 *   2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
00160 *   3. The escape sequence '~\n' is a line-continuation marker to be
00161 *     consumed with no output produced.
00162 *   In GB mode, characters are interpreted two bytes at a time as (pure)
00163 *   GB codes until the escape-from-GB code '~}' is read. This code
00164 *   switches the mode from GB back to ASCII.  (Note that the escape-
00165 *   from-GB code '~}' ($7E7D) is outside the defined GB range.)
00166 *
00167 *   Source: RFC 1842
00168 */
00169 
00170 U_CFUNC void UConverter_toUnicode_HZ(UConverterToUnicodeArgs *args,
00171                                               UErrorCode* err){
00172     char tempBuf[3];
00173     const char* pBuf;
00174     const char *mySource = ( char *) args->source;
00175     UChar *myTarget = args->target;
00176     char *tempLimit = &tempBuf[2]+1; 
00177     int32_t mySourceIndex = 0;
00178     int32_t myTargetIndex = 0;
00179     const char *mySourceLimit = args->sourceLimit;
00180     UChar32 targetUniChar = 0x0000;
00181     UChar mySourceChar = 0x0000;
00182     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
00183     
00184     
00185     /*Arguments Check*/
00186     if (U_FAILURE(*err)) 
00187         return;
00188     
00189     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
00190         *err = U_ILLEGAL_ARGUMENT_ERROR;
00191         return;
00192     }
00193     
00194     while(mySource< args->sourceLimit){
00195         
00196         if(myTarget < args->targetLimit){
00197             
00198             mySourceChar= (unsigned char) *mySource++;
00199             
00200             /*if( mySourceChar <= 0x20){
00201                 myData->isStateDBCS = FALSE;
00202             }*/
00203 
00204             switch(mySourceChar){
00205                 case 0x0A:
00206                     if(args->converter->mode ==UCNV_TILDE){
00207                         args->converter->mode=0;
00208                         
00209                     }
00210                     *(myTarget++)=(UChar)mySourceChar;
00211                     continue;
00212             
00213                 case UCNV_TILDE:
00214                     if(args->converter->mode ==UCNV_TILDE){
00215                         *(myTarget++)=(UChar)mySourceChar;
00216                         args->converter->mode=0;
00217                         continue;
00218                         
00219                     }
00220                     else if(args->converter->toUnicodeStatus !=0){
00221                         args->converter->mode=0;
00222                         break;
00223                     }
00224                     else{
00225                         args->converter->mode = UCNV_TILDE;
00226                         continue;
00227                     }
00228                 
00229                 
00230                 case UCNV_OPEN_BRACE:
00231                     if(args->converter->mode == UCNV_TILDE){
00232                         args->converter->mode=0;
00233                         myData->isStateDBCS = TRUE;
00234                         continue;
00235                     }
00236                     else{
00237                         break;
00238                     }
00239                
00240                 
00241                 case UCNV_CLOSE_BRACE:
00242                     if(args->converter->mode == UCNV_TILDE){
00243                         args->converter->mode=0;
00244                         myData->isStateDBCS = FALSE;
00245                         continue;
00246                     }
00247                     else{
00248                         break;
00249                     }
00250                 
00251                 default: 
00252                     /* if the first byte is equal to TILDE and the trail byte
00253                      * is not a valid byte then it is an error condition
00254                      */
00255                     if(args->converter->mode == UCNV_TILDE){
00256                         args->converter->mode=0;
00257                         mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
00258                         goto SAVE_STATE;
00259                     }
00260                     
00261                     break;
00262 
00263             }
00264              
00265             if(myData->isStateDBCS){
00266                 if(args->converter->toUnicodeStatus == 0x00){
00267                     args->converter->toUnicodeStatus = (UChar) mySourceChar;
00268                     continue;
00269                 }
00270                 else{
00271                     tempBuf[0] =        (char) (args->converter->toUnicodeStatus +0x80);
00272                     tempBuf[1] =        (char) (mySourceChar+0x80);
00273                     mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
00274                     args->converter->toUnicodeStatus =0x00;
00275                     pBuf = &tempBuf[0];
00276                     tempLimit = &tempBuf[2]+1;
00277                     targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
00278                         &pBuf,tempLimit,args->converter->useFallback);
00279                 }
00280             }
00281             else{
00282                 if(args->converter->fromUnicodeStatus == 0x00){
00283                     tempBuf[0] = (char) mySourceChar;
00284                     pBuf = &tempBuf[0];
00285                     tempLimit = &tempBuf[1];
00286                     targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
00287                         &pBuf,tempLimit,args->converter->useFallback);
00288                 }
00289                 else{
00290                     goto SAVE_STATE;
00291                 }
00292 
00293             }
00294             if(targetUniChar < 0xfffe){
00295                 *(myTarget++)=(UChar)targetUniChar;
00296             }
00297             else if(targetUniChar>=0xfffe){
00298 SAVE_STATE:
00299                 {
00300                     const char *saveSource = args->source;
00301                     UChar *saveTarget = args->target;
00302                     int32_t *saveOffsets = args->offsets;
00303                     UConverterCallbackReason reason;
00304                 
00305                     if(targetUniChar == 0xfffe){
00306                         reason = UCNV_UNASSIGNED;
00307                         *err = U_INVALID_CHAR_FOUND;
00308                     }
00309                     else{
00310                         reason = UCNV_ILLEGAL;
00311                         *err = U_ILLEGAL_CHAR_FOUND;
00312                     }
00313                     if(myData->isStateDBCS){
00314 
00315                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80);
00316                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80);                    
00317                     }
00318                     else{
00319                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar;
00320                     }
00321                     args->target = myTarget;
00322                     args->source = mySource;
00323                     ToU_CALLBACK_MACRO( args->converter->toUContext,
00324                         args,
00325                         args->converter->invalidCharBuffer,
00326                         args->converter->invalidCharLength,
00327                         reason,
00328                         err);
00329                     myTarget = args->target;
00330                     args->source  =     saveSource;
00331                     args->target  =     saveTarget;
00332                     args->offsets =     saveOffsets;
00333                     args->converter->invalidCharLength=0;
00334                     if(U_FAILURE(*err))
00335                         break;
00336 
00337                 }
00338             }
00339         }
00340         else{
00341             *err =U_BUFFER_OVERFLOW_ERROR;
00342             break;
00343         }
00344     }
00345     if((args->flush==TRUE)
00346         && (mySource == mySourceLimit) 
00347         && ( args->converter->toUnicodeStatus !=0x00)){
00348         if(U_SUCCESS(*err)){
00349             *err = U_TRUNCATED_CHAR_FOUND;
00350             args->converter->toUnicodeStatus = 0x00;
00351         }
00352     }
00353     /* Reset the state of converter if we consumed 
00354      * the source and flush is true
00355      */
00356     if( (mySource == mySourceLimit) && args->flush){
00357         _HZReset(args->converter);
00358     }
00359 
00360     args->target = myTarget;
00361     args->source = mySource;
00362 }
00363 
00364 
00365 U_CFUNC void UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
00366                                                             UErrorCode* err){
00367     char tempBuf[3];
00368     const char* pBuf;
00369     const char *mySource = ( char *) args->source;
00370     UChar *myTarget = args->target;
00371     char *tempLimit = &tempBuf[3]; 
00372     int32_t mySourceIndex = 0;
00373     int32_t myTargetIndex = 0;
00374     const char *mySourceLimit = args->sourceLimit;
00375     UChar32 targetUniChar = 0x0000;
00376     UChar mySourceChar = 0x0000;
00377     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
00378     
00379     /*Arguments Check*/
00380     if (U_FAILURE(*err)) 
00381         return;
00382     
00383     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
00384         *err = U_ILLEGAL_ARGUMENT_ERROR;
00385         return;
00386     }
00387     
00388     while(mySource< args->sourceLimit){
00389         
00390         if(myTarget < args->targetLimit){
00391             
00392             mySourceChar= (unsigned char) *mySource++;
00393             
00394             
00395             /*if( mySourceChar <= 0x20){
00396                 myData->isStateDBCS = FALSE;
00397             }*/
00398 
00399             switch(mySourceChar){
00400                 case 0x0A:
00401                     if(args->converter->mode ==UCNV_TILDE){
00402                         args->converter->mode=0;
00403                         
00404                     }
00405                     *(myTarget++)=(UChar)mySourceChar;
00406                     continue;
00407             
00408                 case UCNV_TILDE:
00409                     if(args->converter->mode ==UCNV_TILDE){
00410                         *(myTarget++)=(UChar)mySourceChar;
00411                         args->converter->mode=0;
00412                         continue;
00413                         
00414                     }
00415                     else if(args->converter->toUnicodeStatus !=0){
00416                         args->converter->mode=0;
00417                         break;
00418                     }
00419                     else{
00420                         args->converter->mode = UCNV_TILDE;
00421                         continue;
00422                     }
00423                 
00424                 
00425                 case UCNV_OPEN_BRACE:
00426                     if(args->converter->mode == UCNV_TILDE){
00427                         args->converter->mode=0;
00428                         myData->isStateDBCS = TRUE;
00429                         continue;
00430                     }
00431                     else{
00432                         break;
00433                     }
00434                
00435                 
00436                 case UCNV_CLOSE_BRACE:
00437                     if(args->converter->mode == UCNV_TILDE){
00438                         args->converter->mode=0;
00439                          myData->isStateDBCS = FALSE;
00440                         continue;
00441                     }
00442                     else{
00443                         break;
00444                     }
00445                 
00446                 default:
00447                      /* if the first byte is equal to TILDE and the trail byte
00448                      * is not a valid byte then it is an error condition
00449                      */
00450                     if(args->converter->mode == UCNV_TILDE){
00451                         args->converter->mode=0;
00452                         mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
00453                         goto SAVE_STATE;
00454                     }
00455                     
00456                     break;
00457 
00458             }
00459              
00460             if(myData->isStateDBCS){
00461                 if(args->converter->toUnicodeStatus == 0x00){
00462                     args->converter->toUnicodeStatus = (UChar) mySourceChar;
00463                     continue;
00464                 }
00465                 else{
00466                     tempBuf[0] =        (char) (args->converter->toUnicodeStatus+0x80) ;
00467                     tempBuf[1] =        (char) (mySourceChar+0x80);
00468                     mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
00469                     args->converter->toUnicodeStatus =0x00;
00470                     pBuf = &tempBuf[0];
00471                     tempLimit = &tempBuf[2]+1;
00472                     targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
00473                         &pBuf,tempLimit,args->converter->useFallback);
00474                 }
00475             }
00476             else{
00477                 if(args->converter->fromUnicodeStatus == 0x00){
00478                     tempBuf[0] = (char) mySourceChar;
00479                     pBuf = &tempBuf[0];
00480                     tempLimit = &tempBuf[1];
00481                     targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
00482                         &pBuf,tempLimit,args->converter->useFallback);
00483                 }
00484                 else{
00485                     goto SAVE_STATE;
00486                 }
00487 
00488             }
00489             if(targetUniChar < 0xfffe){
00490                 if(myData->isStateDBCS){
00491                     args->offsets[myTarget - args->target]=     mySource - args->source - 2;
00492                 }
00493                 else{
00494                     args->offsets[myTarget - args->target]=     mySource - args->source - 1;
00495                 }
00496                 *(myTarget++)=(UChar)targetUniChar;
00497             }
00498             else if(targetUniChar>=0xfffe){
00499 SAVE_STATE:
00500                 {
00501                    const char *saveSource = args->source;
00502                     UChar *saveTarget = args->target; 
00503                     int32_t *saveOffsets = args->offsets;
00504                     
00505                     UConverterCallbackReason reason;
00506                     int32_t currentOffset ;
00507                     int32_t My_i = myTarget - args->target;
00508                     
00509                     if(targetUniChar == 0xfffe){
00510                         reason = UCNV_UNASSIGNED;
00511                         *err = U_INVALID_CHAR_FOUND;
00512                     }
00513                     else{
00514                         reason = UCNV_ILLEGAL;
00515                         *err = U_ILLEGAL_CHAR_FOUND;
00516                     }
00517                     if(myData->isStateDBCS){
00518 
00519                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80);
00520                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80);
00521                         currentOffset=  mySource - args->source -2;
00522                     
00523                     }
00524                     else{
00525                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar;
00526                         currentOffset=  mySource - args->source -1;
00527                     }
00528                     args->offsets = args->offsets?args->offsets+(myTarget - args->target):0;
00529                     args->target = myTarget;
00530                     args->source = mySource;
00531                     myTarget = saveTarget;
00532                     ToU_CALLBACK_OFFSETS_LOGIC_MACRO( args->converter->toUContext,
00533                         args,
00534                         args->converter->invalidCharBuffer,
00535                         args->converter->invalidCharLength,
00536                         reason,
00537                         err);
00538                     args->converter->invalidCharLength=0;
00539                     args->source  =     saveSource;
00540                     myTarget = args->target;
00541                     args->target  =     saveTarget;
00542                     args->offsets =     saveOffsets;
00543                     if(U_FAILURE(*err))
00544                         break;
00545                 }
00546             }
00547         }
00548         else{
00549             *err =U_BUFFER_OVERFLOW_ERROR;
00550             break;
00551         }
00552     }
00553     if((args->flush==TRUE)
00554         && (mySource == mySourceLimit) 
00555         && ( args->converter->toUnicodeStatus !=0x00)){
00556         if(U_SUCCESS(*err)){
00557             *err = U_TRUNCATED_CHAR_FOUND;
00558             args->converter->toUnicodeStatus = 0x00;
00559         }
00560     }
00561     /* Reset the state of converter if we consumed 
00562      * the source and flush is true
00563      */
00564     if( (mySource == mySourceLimit) && args->flush){
00565          _HZReset(args->converter);
00566     }
00567 
00568     args->target = myTarget;
00569     args->source = mySource;
00570 }
00571 
00572 static void concatEscape(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
00573                          const  char* strToAppend,UErrorCode* err,int len,int32_t *sourceIndex){
00574     while(len-->0){
00575         if(*targetIndex < *targetLength){
00576             args->target[*targetIndex] = (unsigned char) *strToAppend;
00577             if(args->offsets!=NULL){
00578                 args->offsets[*targetIndex] = *sourceIndex-1;
00579             }
00580             (*targetIndex)++;
00581         }
00582         else{
00583             args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend;
00584             *err =U_BUFFER_OVERFLOW_ERROR;
00585         }
00586         strToAppend++;
00587     }
00588 }
00589 
00590 static void concatString(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
00591                          const  UChar32* strToAppend,UErrorCode* err, int32_t *sourceIndex){
00592     
00593     if(*strToAppend < 0x00FF){
00594         if( (*targetIndex)+1 >= *targetLength){
00595             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend;
00596             *err = U_BUFFER_OVERFLOW_ERROR;
00597         }else{
00598             args->target[*targetIndex] = (unsigned char) *strToAppend;
00599             
00600             if(args->offsets!=NULL){
00601                 args->offsets[*targetIndex] = *sourceIndex-1;
00602             }
00603             (*targetIndex)++;
00604             
00605         }
00606     }
00607     else{
00608         if(*targetIndex < *targetLength){
00609             args->target[*targetIndex] =(unsigned char) ((*strToAppend>>8)-0x80 );
00610             if(args->offsets!=NULL){
00611                 args->offsets[*targetIndex] = *sourceIndex-1;
00612             }
00613             (*targetIndex)++;
00614             
00615             if(*targetIndex < *targetLength){
00616                 args->target[(*targetIndex)] =(unsigned char) ((*strToAppend & 0x00FF)-0x80);
00617                 
00618                 if(args->offsets!=NULL){
00619                     args->offsets[*targetIndex] = *sourceIndex-1;
00620                 }
00621                 (*targetIndex)++;
00622             }
00623             else{
00624                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((*strToAppend & 0x00FF)-0x80);
00625                 *err = U_BUFFER_OVERFLOW_ERROR;
00626                 
00627             }
00628             
00629         }
00630         else{
00631             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((*strToAppend>>8)-0x80);
00632             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((*strToAppend & 0x00FF)-0x80);
00633             *err = U_BUFFER_OVERFLOW_ERROR;
00634             if(args->offsets!=NULL){
00635                 args->offsets[*targetIndex] = *sourceIndex-1;
00636                 
00637             }
00638         }
00639     }
00640     
00641 }
00642 
00643 U_CFUNC void UConverter_fromUnicode_HZ(UConverterFromUnicodeArgs *args, UErrorCode *err){
00644 
00645 
00646     const UChar *mySource = args->source;
00647     unsigned char *myTarget = (unsigned char *) args->target;
00648     int32_t mySourceIndex = 0;
00649     int32_t myTargetIndex = 0;
00650     uint32_t targetValue=0;
00651     int32_t targetLength = args->targetLimit - args->target;
00652     int32_t sourceLength = args->sourceLimit - args->source;
00653     int32_t length=0;
00654     uint32_t targetUniChar = 0x0000;
00655     UChar32 mySourceChar = 0x0000,c=0x0000;
00656     UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
00657     UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
00658     UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
00659     UConverterCallbackReason reason;
00660     UBool isEscapeAppended =FALSE;
00661     
00662     /*Arguments Check*/
00663     if (U_FAILURE(*err)) 
00664         return;
00665     
00666     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
00667         *err = U_ILLEGAL_ARGUMENT_ERROR;
00668         return;
00669     }
00670     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) {
00671         goto getTrail;
00672     }
00673     /*writing the char to the output stream */
00674     while (mySourceIndex < sourceLength){
00675         
00676         if (myTargetIndex < targetLength){
00677             
00678             c=mySourceChar = (UChar) args->source[mySourceIndex++];
00679             
00680             /*Handle surrogates */
00681             if(UTF_IS_SURROGATE(mySourceChar)) {
00682                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
00683                     args->converter->fromUSurrogateLead = (UChar)mySourceChar;
00684 getTrail:
00685                     /*look ahead to find the trail surrogate*/
00686                     if(mySourceIndex <  sourceLength) {
00687                         /* test the following code unit */
00688                         UChar trail=(UChar) args->source[mySourceIndex];
00689                         if(UTF_IS_SECOND_SURROGATE(trail)) {
00690                             ++mySourceIndex;
00691                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
00692                             args->converter->fromUSurrogateLead = 0x00;
00693                             /* convert this surrogate code point */
00694                             /* exit this condition tree */
00695                         } else {
00696                             /* this is an unmatched lead code unit (1st surrogate) */
00697                             /* callback(illegal) */
00698                             reason=UCNV_ILLEGAL;
00699                             *err=U_ILLEGAL_CHAR_FOUND;
00700                             goto CALLBACK;
00701                         }
00702                     } else {
00703                         /* no more input */
00704                         break;
00705                     }
00706                 } else {
00707                     /* this is an unmatched trail code unit (2nd surrogate) */
00708                     /* callback(illegal) */
00709                     reason=UCNV_ILLEGAL;
00710                     *err=U_ILLEGAL_CHAR_FOUND;
00711                     goto CALLBACK;
00712                 }
00713             }
00714             oldIsTargetUCharDBCS = isTargetUCharDBCS;
00715             if(mySourceChar == 0x7E){
00716                 concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);
00717                 continue;
00718             }
00719             else{
00720                 length= _MBCSFromUChar32(myConverterData->gbConverter->sharedData,
00721                     mySourceChar,&targetValue,args->converter->useFallback);
00722                 targetUniChar = (UChar32) targetValue;
00723 
00724             }
00725             /* only DBCS or SBCS characters are expected*/
00726             if(length > 2 || length==0){
00727                 reason =UCNV_ILLEGAL;
00728                 *err =U_INVALID_CHAR_FOUND;
00729                 goto CALLBACK;
00730             }
00731             /* DB haracters with high bit set to 1 are expected */
00732             if(((targetUniChar & 0x8080) != 0x8080)&& length==2){
00733                 reason =UCNV_ILLEGAL;
00734                 *err =U_INVALID_CHAR_FOUND;
00735                 goto CALLBACK;
00736             }
00737             
00738             myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
00739             
00740             if (targetUniChar != missingCharMarker){
00741                     
00742                  if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
00743                     /*Shifting from a double byte to single byte mode*/
00744                     if(!isTargetUCharDBCS){
00745                         concatEscape(args, &myTargetIndex, &targetLength, SB_ESCAPE,err, 2,&mySourceIndex);
00746                         myConverterData->isEscapeAppended =isEscapeAppended =TRUE;
00747                     }
00748                     else{ /* Shifting from a single byte to double byte mode*/
00749                         concatEscape(args, &myTargetIndex, &targetLength, DB_ESCAPE,err, 2,&mySourceIndex);
00750                         myConverterData->isEscapeAppended =isEscapeAppended =TRUE;
00751                         
00752                     }
00753                 }
00754             
00755                 concatString(args, &myTargetIndex, &targetLength,&targetUniChar,err, &mySourceIndex);
00756 
00757                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
00758             }
00759             else{
00760                 
00761 CALLBACK:
00762                     {
00763                         const   UChar* saveSource = args->source;
00764                         char*   saveTarget = args->target;
00765                         int32_t *saveOffsets = args->offsets;
00766                         *err = U_INVALID_CHAR_FOUND;
00767                         args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
00768                         args->converter->invalidUCharLength = 1;
00769                 
00770                         myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
00771                         args->target += myTargetIndex;
00772                         args->source += mySourceIndex;
00773                         FromU_CALLBACK_MACRO(args->converter->fromUContext,
00774                             args,
00775                             args->converter->invalidUCharBuffer,
00776                             1,
00777                             (UChar32) mySourceChar,
00778                             UCNV_UNASSIGNED,
00779                             err);
00780                         args->source = saveSource;
00781                         args->target = saveTarget;
00782                         args->offsets = saveOffsets;
00783                         isTargetUCharDBCS=(UBool)myConverterData->isTargetUCharDBCS;
00784                         args->converter->fromUSurrogateLead =0x00;
00785                         myConverterData->isEscapeAppended =isEscapeAppended =FALSE;
00786                         args->converter->invalidUCharLength = 0;
00787                         if (U_FAILURE (*err)) 
00788                             break;
00789                         
00790                     }
00791             }
00792         }
00793         else{
00794             *err = U_BUFFER_OVERFLOW_ERROR;
00795             break;
00796         }
00797         targetUniChar=missingCharMarker;
00798     }
00799     /*If at the end of conversion we are still carrying state information
00800      *flush is TRUE, we can deduce that the input stream is truncated
00801      */
00802     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == sourceLength) && args->flush){
00803         if (U_SUCCESS(*err)){
00804             *err = U_TRUNCATED_CHAR_FOUND;
00805             args->converter->toUnicodeStatus = 0x00;
00806         }
00807     }
00808     /* Reset the state of converter if we consumed 
00809      * the source and flush is true
00810      */
00811     if( (mySourceIndex == sourceLength) && args->flush){
00812         _HZReset(args->converter);
00813     }
00814 
00815     args->target += myTargetIndex;
00816     args->source += mySourceIndex;
00817     myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
00818     
00819     return;
00820 }
00821 
00822 
00823 U_CFUNC void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
00824                                                       UErrorCode * err){
00825     const UChar *mySource = args->source;
00826     unsigned char *myTarget = (unsigned char *) args->target;
00827     int32_t mySourceIndex = 0;
00828     int32_t myTargetIndex = 0;
00829     int32_t targetLength = args->targetLimit - args->target;
00830     int32_t sourceLength = args->sourceLimit - args->source;
00831     int32_t length=0;
00832     uint32_t targetUniChar = 0x0000;
00833     UChar32 mySourceChar = 0x0000,c=0x0000;
00834     UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
00835     UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
00836     UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
00837     UConverterCallbackReason reason;
00838     UBool isEscapeAppended =FALSE;
00839     
00840     /*Arguments Check*/
00841     if (U_FAILURE(*err)) 
00842         return;
00843     
00844     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
00845         *err = U_ILLEGAL_ARGUMENT_ERROR;
00846         return;
00847     }
00848     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) {
00849         goto getTrail;
00850     }
00851     /*writing the char to the output stream */
00852     while (mySourceIndex < sourceLength){
00853         
00854         if (myTargetIndex < targetLength){
00855             
00856             c=mySourceChar = (UChar) args->source[mySourceIndex++];
00857             
00858             /*Handle surrogates */
00859             if(UTF_IS_SURROGATE(mySourceChar)) {
00860                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
00861                     args->converter->fromUSurrogateLead = (UChar) mySourceChar;
00862 getTrail:
00863                     /*look ahead to find the trail surrogate*/
00864                     if(mySourceIndex <  sourceLength) {
00865                         /* test the following code unit */
00866                         UChar trail=(UChar) args->source[mySourceIndex];
00867                         if(UTF_IS_SECOND_SURROGATE(trail)) {
00868                             ++mySourceIndex;
00869                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
00870                             args->converter->fromUSurrogateLead = 0x00;
00871                             /* convert this surrogate code point */
00872                             /* exit this condition tree */
00873                         } else {
00874                             /* this is an unmatched lead code unit (1st surrogate) */
00875                             /* callback(illegal) */
00876                             reason=UCNV_ILLEGAL;
00877                             *err=U_ILLEGAL_CHAR_FOUND;
00878                             goto CALLBACK;
00879                         }
00880                     } else {
00881                         /* no more input */
00882                         break;
00883                     }
00884                 } else {
00885                     /* this is an unmatched trail code unit (2nd surrogate) */
00886                     /* callback(illegal) */
00887                     reason=UCNV_ILLEGAL;
00888                     *err=U_ILLEGAL_CHAR_FOUND;
00889                     goto CALLBACK;
00890                 }
00891             }
00892             oldIsTargetUCharDBCS = isTargetUCharDBCS;
00893             if(mySourceChar == 0x7E){
00894                 concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);
00895                 continue;
00896             }
00897             else{
00898                 length= _MBCSFromUChar32(myConverterData->gbConverter->sharedData,
00899                     mySourceChar,&targetUniChar,args->converter->useFallback);
00900 
00901             }
00902             /* only DBCS or SBCS characters are expected*/
00903             if(length > 2 || length==0){
00904                 reason =UCNV_ILLEGAL;
00905                 *err =U_INVALID_CHAR_FOUND;
00906                 goto CALLBACK;
00907             }
00908             /* DB haracters with high bit set to 1 are expected */
00909             if(((targetUniChar & 0x8080) != 0x8080)&& length==2){
00910                 reason =UCNV_ILLEGAL;
00911                 *err =U_INVALID_CHAR_FOUND;
00912                 goto CALLBACK;
00913             }
00914             
00915             myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
00916             
00917             if (targetUniChar != missingCharMarker){
00918                     
00919                  if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
00920                     /*Shifting from a double byte to single byte mode*/
00921                     if(!isTargetUCharDBCS){
00922                         concatEscape(args, &myTargetIndex, &targetLength, SB_ESCAPE,err, 2,&mySourceIndex);
00923                         myConverterData->isEscapeAppended =isEscapeAppended =TRUE;
00924                     }
00925                     else{ /* Shifting from a single byte to double byte mode*/
00926                         concatEscape(args, &myTargetIndex, &targetLength, DB_ESCAPE,err, 2,&mySourceIndex);
00927                         myConverterData->isEscapeAppended =isEscapeAppended =TRUE;
00928                         
00929                     }
00930                 }
00931             
00932                 concatString(args, &myTargetIndex, &targetLength,&targetUniChar,err, &mySourceIndex);
00933 
00934                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
00935             }
00936             else{
00937                 
00938 CALLBACK:
00939                 {
00940                     int32_t currentOffset = args->offsets[myTargetIndex-1]+1;
00941                     char * saveTarget = args->target;
00942                     const UChar* saveSource = args->source;
00943                     int32_t *saveOffsets = args->offsets;
00944                     *err = U_INVALID_CHAR_FOUND;
00945                     args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
00946                     args->converter->invalidUCharLength = 1;
00947                 
00948                     /* Breaks out of the loop since behaviour was set to stop */
00949                     args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
00950                     args->target +=     myTargetIndex;
00951                     args->source +=     mySourceIndex;
00952                     args->offsets =     args->offsets?args->offsets+myTargetIndex:0;
00953                     FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext,
00954                         args,
00955                         args->converter->invalidUCharBuffer,
00956                         1,
00957                         (UChar32)mySourceChar,
00958                         UCNV_UNASSIGNED,
00959                         err);
00960                     args->source = saveSource;
00961                     args->target = saveTarget;
00962                     args->offsets =     saveOffsets;
00963                     isTargetUCharDBCS=(UBool)myConverterData->isTargetUCharDBCS;
00964                     args->converter->fromUSurrogateLead =0x00;
00965                     myConverterData->isEscapeAppended =isEscapeAppended =FALSE;
00966                     args->converter->invalidUCharLength = 0;
00967                     if (U_FAILURE (*err))     
00968                         break;
00969                     
00970                 }
00971             }
00972         }
00973         else{
00974             *err = U_BUFFER_OVERFLOW_ERROR;
00975             break;
00976         }
00977         targetUniChar=missingCharMarker;
00978     }
00979     /*If at the end of conversion we are still carrying state information
00980      *flush is TRUE, we can deduce that the input stream is truncated
00981      */
00982     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == sourceLength) && args->flush){
00983         if (U_SUCCESS(*err)){
00984             *err = U_TRUNCATED_CHAR_FOUND;
00985             args->converter->toUnicodeStatus = 0x00;
00986         }
00987     }
00988     /* Reset the state of converter if we consumed 
00989      * the source and flush is true
00990      */
00991     if( (mySourceIndex == sourceLength) && args->flush){
00992         _HZReset(args->converter);
00993     }
00994 
00995     args->target += myTargetIndex;
00996     args->source += mySourceIndex;
00997     myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
00998     
00999     return;
01000 }
01001 
01002 U_CFUNC UChar32 UConverter_getNextUChar_HZ (UConverterToUnicodeArgs * pArgs,
01003                                             UErrorCode *pErrorCode){
01004     UChar buffer[UTF_MAX_CHAR_LENGTH];
01005     const char *realLimit=pArgs->sourceLimit;
01006     
01007     pArgs->target=buffer;
01008     pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
01009     
01010     while(pArgs->source<realLimit) {
01011         /* feed in one byte at a time to make sure to get only one character out */
01012         pArgs->sourceLimit=pArgs->source+1;
01013         pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
01014         UConverter_toUnicode_HZ(pArgs, pErrorCode);
01015         if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
01016             return 0xffff;
01017         } else if(pArgs->target!=buffer) {
01018             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
01019                 *pErrorCode=U_ZERO_ERROR;
01020             }
01021             return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
01022         }
01023     }
01024     
01025     /* no output because of empty input or only state changes and skipping callbacks */
01026     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
01027     return 0xffff;
01028 }

Generated at Tue Dec 5 10:48:03 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000