Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnv_cb.c

00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 2000, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006  *  ucnv_cb.c:
00007  *  External APIs for the ICU's codeset conversion library
00008  *  Helena Shih
00009  * 
00010  * Modification History:
00011  *
00012  *   Date        Name        Description 
00013  *   7/28/2000   srl         Implementation
00014  */
00015 
00021 #include "unicode/utypes.h"
00022 #include "unicode/ucnv_cb.h"
00023 #include "ucnv_bld.h"
00024 #include "cmemory.h"
00025 
00026 /* need to update the offsets when the target moves. */
00027 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
00028 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
00029 the same call stack if the complexity arises. */
00030 void ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
00031                        const char* source,
00032                        int32_t length,
00033                        int32_t offsetIndex,
00034                        UErrorCode * err)
00035 {
00036   int32_t togo;
00037   int8_t toerr;
00038   int32_t i;
00039 
00040   if((args->targetLimit - args->target) >= length) /* If the buffer fits.. */
00041   {
00042     uprv_memcpy(args->target, source, length);
00043     args->target += length;
00044     if(args->offsets) /* set all the offsets to the same # */
00045     {
00046       for(i=0;i<length;i++)
00047       {
00048         *(args->offsets++) = offsetIndex;
00049       }
00050     }
00051   }
00052   else
00053   {
00054     togo = args->targetLimit - args->target;
00055 
00056     uprv_memcpy(args->target, source, togo);
00057     args->target += togo;
00058     
00059     if(args->offsets)
00060     {
00061       for(i=0;i<togo;i++)
00062       {
00063         *(args->offsets++) = offsetIndex;
00064       }
00065     }
00066 
00067     /* Now, copy the remainder into the errbuff */
00068     source += togo;
00069     toerr = (int8_t)(length - togo);
00070     
00071     uprv_memcpy(args->converter->charErrorBuffer + 
00072                 args->converter->charErrorBufferLength, 
00073                 source,
00074                 toerr * sizeof(source[0]));
00075     args->converter->charErrorBufferLength += toerr;
00076 
00077     *err = U_BUFFER_OVERFLOW_ERROR;
00078 
00079   }
00080 }
00081 
00082 void ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
00083                              const UChar** source,
00084                              const UChar*  sourceLimit,
00085                              int32_t offsetIndex,
00086                              UErrorCode * err)
00087 {
00088   /*
00089     This is a fun one.  Recursion can occur - we're basically going to 
00090     just retry shoving data through the same converter. Note, if you got 
00091     here through some kind of invalid sequence, you maybe should emit a 
00092     reset sequence of some kind and/or call ucnv_reset().  Since this
00093     IS an actual conversion, take care that you've changed the callback
00094     or the data, or you'll get an infinite loop.
00095     
00096     Please set the err value to something reasonable before calling
00097     into this.
00098   */
00099 
00100   char *oldTarget;
00101 
00102   if(U_FAILURE(*err))
00103   {
00104     return;
00105   }
00106 
00107   oldTarget = args->target;
00108 
00109   ucnv_fromUnicode(args->converter,
00110                    &args->target,
00111                    args->targetLimit,
00112                    source,
00113                    sourceLimit,
00114                    NULL, /* no offsets */
00115                    FALSE, /* no flush */
00116                    err);
00117 
00118   if(args->offsets) 
00119   {
00120     while (args->target != oldTarget)  /* if it moved at all.. */
00121     {
00122       *(args->offsets)++ = offsetIndex;
00123       oldTarget++;
00124     }
00125   }
00126 
00127   /* Note, if you did something like used a Stop subcallback, things would get interesting.  
00128      In fact, here's where we want to return the partially consumed in-source! 
00129   */
00130   if(*err == U_BUFFER_OVERFLOW_ERROR)
00131          /* && (*source < sourceLimit && args->target >= args->targetLimit) 
00132                     -- S. Hrcek */
00133   {
00134     /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
00135        It's a fixed size. If we overflow it... Hmm */
00136     char *newTarget;
00137     const char *newTargetLimit;
00138     UErrorCode err2 = U_ZERO_ERROR;
00139 
00140     int8_t errBuffLen;
00141 
00142     errBuffLen  = args->converter->charErrorBufferLength;
00143 
00144     /* start the new target at the first free slot in the errbuff.. */
00145     newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
00146       
00147     newTargetLimit = (char *)(args->converter->charErrorBuffer +
00148       sizeof(args->converter->charErrorBuffer));
00149 
00150     if(newTarget >= newTargetLimit) 
00151     {
00152       *err = U_INTERNAL_PROGRAM_ERROR;
00153       return;
00154     }
00155 
00156     /* We're going to tell the converter that the errbuff len is empty.
00157        This prevents the existing errbuff from being 'flushed' out onto
00158        itself.  If the errbuff is needed by the converter this time, 
00159        we're hosed - we're out of space! */
00160 
00161     args->converter->charErrorBufferLength = 0;
00162     
00163     ucnv_fromUnicode(args->converter,
00164                      &newTarget, 
00165                      newTargetLimit,
00166                      source,
00167                      sourceLimit,
00168                      NULL,
00169                      FALSE,
00170                      &err2);
00171 
00172     /* We can go ahead and overwrite the  length here. We know just how
00173        to recalculate it. */
00174 
00175     args->converter->charErrorBufferLength = (int8_t)(
00176       newTarget - (char*)args->converter->charErrorBuffer);
00177 
00178     if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
00179     {
00180       /* now we're REALLY in trouble.
00181          Internal program error - callback oughtn't to have written this much
00182          data!
00183       */
00184       *err = U_INTERNAL_PROGRAM_ERROR;
00185       return;
00186     }
00187     else
00188     {
00189       /* sub errs could be invalid/truncated/illegal chars or w/e.
00190          These might want to be passed on up.. But the problem is, we already
00191          need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these 
00192          other errs.. */
00193 
00194       /*
00195           if(U_FAILURE(err2))
00196                 ??
00197       */
00198     }
00199   }
00200 }
00201 
00202 void ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
00203                            int32_t offsetIndex,
00204                        UErrorCode * err)
00205 {
00206     char togo[5];
00207     int32_t togoLen;
00208 
00209     if(U_FAILURE(*err))
00210     {
00211       return;
00212     }
00213 
00214     /*In case we're dealing with a modal converter a la UCNV_EBCDIC_STATEFUL,
00215     we need to make sure that the emitting of the substitution charater in the right mode*/
00216     uprv_memcpy(togo, args->converter->subChar, togoLen = args->converter->subCharLen);
00217     if (ucnv_getType(args->converter) == UCNV_EBCDIC_STATEFUL)
00218     {
00219         if ((args->converter->fromUnicodeStatus)&&(togoLen != 2))
00220         {
00221             togo[0] = UCNV_SI;
00222             togo[1] = args->converter->subChar[0];
00223             togo[2] = UCNV_SO;
00224             togoLen = 3;
00225         }
00226         else if (!(args->converter->fromUnicodeStatus)&&(togoLen != 1))
00227         {
00228             togo[0] = UCNV_SO;
00229             togo[1] = args->converter->subChar[0];
00230             togo[2] = args->converter->subChar[1];
00231             togo[3] = UCNV_SI;
00232             togoLen = 4;
00233         }
00234     }
00235 
00236     /*if we have enough space on the output buffer we just copy
00237     the subchar there and update the pointer */  
00238     ucnv_cbFromUWriteBytes(args, togo, togoLen, offsetIndex, err);
00239 
00240 
00241     return;
00242 }
00243 
00244 void ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
00245                             const UChar* source,
00246                             int32_t length,
00247                             int32_t offsetIndex,
00248                             UErrorCode * err)
00249 {
00250   int32_t togo;
00251   int8_t toerr;
00252   int32_t i;
00253   
00254   if(U_FAILURE(*err))
00255   {
00256     return;
00257   }
00258 
00259 
00260   if((args->targetLimit - args->target) >= length) /* If the buffer fits.. */
00261   {
00262     uprv_memcpy(args->target, source, length * sizeof(args->target[0]) );
00263     args->target += length;
00264     if(args->offsets) /* set all the offsets to the same # */
00265     {
00266       for(i=0;i<length;i++)
00267       {
00268         *(args->offsets++) = offsetIndex;
00269       }
00270     }
00271   }
00272   else
00273   {
00274     togo = args->targetLimit - args->target;
00275 
00276     uprv_memcpy(args->target, source, togo * sizeof(args->target[0])  );
00277     args->target += togo;
00278     
00279     if(args->offsets) 
00280     {
00281       for(i=0;i<togo;i++)
00282       {
00283         *(args->offsets++) = offsetIndex;
00284       }
00285     }
00286 
00287     /* Now, copy the remainder into the errbuff */
00288     source += togo;
00289     toerr = (int8_t)(length - togo);
00290     
00291     uprv_memcpy(args->converter->UCharErrorBuffer + 
00292                 args->converter->UCharErrorBufferLength, 
00293                 source,
00294                 toerr * sizeof(source[0]));
00295     args->converter->UCharErrorBufferLength += toerr;
00296 
00297     *err = U_BUFFER_OVERFLOW_ERROR;
00298   }
00299 }
00300 
00301 void ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
00302                          int32_t offsetIndex,
00303                        UErrorCode * err)
00304 {
00305   static const UChar kSubstituteChar  = 0xFFFD ;
00306 
00307   /* could optimize this case, just one uchar */
00308   ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
00309 }

Generated at Tue Dec 5 10:48:01 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000