Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnv_err.c

00001 /*
00002  *******************************************************************************
00003  *
00004  *   Copyright (C) 1998-1999, International Business Machines
00005  *   Corporation and others.  All Rights Reserved.
00006  *
00007  *******************************************************************************
00008  *
00009  *  ucnv_err.c
00010  *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
00011  *
00012  *
00013 *   Change history:
00014 *
00015 *   06/29/2000  helena      Major rewrite of the callback APIs.
00016 */
00017 
00018 #include "ucmp8.h"
00019 #include "ucmp16.h"
00020 #include "unicode/ucnv_err.h"
00021 #include "unicode/ucnv_cb.h"
00022 #include "ucnv_cnv.h"
00023 #include "cmemory.h"
00024 #include "unicode/ucnv.h"
00025 
00026 #define VALUE_STRING_LENGTH 32
00027 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
00028 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
00029 #define UNICODE_U_CODEPOINT 0x0055
00030 #define UNICODE_X_CODEPOINT 0x0058
00031 
00032 
00033 #define ToOffset(a) a<=9?(0x0030+a):(0x0030+a+7)
00034 
00035 UBool 
00036   CONVERSION_U_SUCCESS (UErrorCode err)
00037 {
00038   if ((err == U_INVALID_CHAR_FOUND) || (err == U_ILLEGAL_CHAR_FOUND))    return FALSE;
00039   else    return TRUE;
00040 }
00041 
00042 /*Takes a int32_t and fills in  a UChar* string with that number "radix"-based
00043  * and padded with "pad" zeroes
00044  */
00045 static void   itou (UChar * buffer, uint32_t i, uint32_t radix, int32_t pad)
00046 {
00047   int32_t length = 0;
00048   int32_t num = 0;
00049   int8_t digit;
00050   int32_t j;
00051   UChar temp;
00052 
00053   while (i >= radix)
00054     {
00055       num = i / radix;
00056       digit = (int8_t) (i - num * radix);
00057       buffer[length++] = (UChar) (ToOffset (digit));
00058       i = num;
00059     }
00060 
00061   buffer[length] = (UChar) (ToOffset (i));
00062 
00063   while (length < pad)   buffer[++length] = (UChar) 0x0030;     /*zero padding */
00064   buffer[length--] = (UChar) 0x0000;
00065   
00066   /*Reverses the string */
00067   for (j = 0; j < (pad / 2); j++)
00068     {
00069       temp = buffer[length - j];
00070       buffer[length - j] = buffer[j];
00071       buffer[j] = temp;
00072     }
00073 
00074   return;
00075 }
00076 
00077 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
00078 void   UCNV_FROM_U_CALLBACK_STOP (
00079                   void *context,
00080                   UConverterFromUnicodeArgs *fromUArgs,
00081                   const UChar* codeUnits,
00082                   int32_t length,
00083                   UChar32 codePoint,
00084                   UConverterCallbackReason reason,
00085                   UErrorCode * err)
00086 {
00087   /* the caller must have set the error code accordingly */
00088   return;
00089 }
00090 
00091 
00092 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
00093 void   UCNV_TO_U_CALLBACK_STOP (
00094                    void *context,
00095                    UConverterToUnicodeArgs *toUArgs,
00096                    const char* codePoints,
00097                    int32_t length,
00098                    UConverterCallbackReason reason,
00099                    UErrorCode * err)
00100 {
00101   /* the caller must have set the error code accordingly */
00102   return;
00103 }
00104 
00105 void   UCNV_FROM_U_CALLBACK_SKIP (                  
00106                   void *context,
00107                   UConverterFromUnicodeArgs *fromUArgs,
00108                   const UChar* codeUnits,
00109                   int32_t length,
00110                   UChar32 codePoint,
00111                   UConverterCallbackReason reason,
00112                   UErrorCode * err)
00113 {
00114   if (reason <= UCNV_IRREGULAR)
00115   {
00116     *err = U_ZERO_ERROR;
00117   }
00118 }
00119 
00120 void   UCNV_FROM_U_CALLBACK_SUBSTITUTE (
00121                   void *context,
00122                   UConverterFromUnicodeArgs *fromArgs,
00123                   const UChar* codeUnits,
00124                   int32_t length,
00125                   UChar32 codePoint,
00126                   UConverterCallbackReason reason,
00127                   UErrorCode * err)
00128 {
00129   if (reason > UCNV_IRREGULAR)
00130   {
00131     return;
00132   }
00133   
00134   *err = U_ZERO_ERROR;
00135   
00136   ucnv_cbFromUWriteSub(fromArgs, 0, err);
00137 }
00138 
00139 /*uses itou to get a unicode escape sequence of the offensive sequence,
00140  *uses a clean copy (resetted) of the converter, to convert that unicode
00141  *escape sequence to the target codepage (if conversion failure happens then
00142  *we revert to substituting with subchar)
00143  */
00144 void   UCNV_FROM_U_CALLBACK_ESCAPE (
00145                          void *context,
00146                          UConverterFromUnicodeArgs *fromArgs,
00147                          const UChar *codeUnits,
00148                          int32_t length,
00149                          UChar32 codePoint,
00150                          UConverterCallbackReason reason,
00151                          UErrorCode * err)
00152 {
00153 
00154   UChar valueString[VALUE_STRING_LENGTH];
00155   int32_t valueStringLength = 0;
00156   int32_t i = 0;
00157 
00158   const UChar *myValueSource = NULL;
00159   UErrorCode err2 = U_ZERO_ERROR;
00160   UConverterFromUCallback original = NULL;
00161   void *originalContext;
00162 
00163   UConverterFromUCallback ignoredCallback = NULL;
00164   void *ignoredContext;
00165 
00166   if (reason > UCNV_IRREGULAR)
00167   {
00168     return;
00169   }
00170 
00171   ucnv_setFromUCallBack (fromArgs->converter,
00172              (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
00173              NULL,  /* To Do for HSYS: context is null? */
00174              &original,
00175              &originalContext,
00176              &err2);
00177   if (U_FAILURE (err2))
00178   {
00179     *err = err2;
00180     return;
00181   }
00182   
00183   /*
00184    * ### TODO:
00185    * This should actually really work with the codePoint, not with the codeUnits;
00186    * how do we represent a code point > 0xffff? It should be one single escape, not
00187    * two for a surrogate pair!
00188    */
00189   while (i < length)
00190   {
00191     valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
00192     valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;     /* adding U */
00193     itou (valueString + valueStringLength, codeUnits[i++], 16, 4);
00194     valueStringLength += 4;
00195   }
00196 
00197   myValueSource = valueString;
00198 
00199   /* reset the error */
00200   *err = U_ZERO_ERROR;
00201 
00202   ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
00203 
00204   ucnv_setFromUCallBack (fromArgs->converter,
00205                          original,
00206                          originalContext,
00207                          &ignoredCallback,
00208                          &ignoredContext,
00209                          &err2);
00210   if (U_FAILURE (err2))
00211     {
00212       *err = err2;
00213       return;
00214     }
00215 
00216   return;
00217 }
00218 
00219 
00220 
00221 void UCNV_TO_U_CALLBACK_SKIP (
00222                  void *context,
00223                  UConverterToUnicodeArgs *toArgs,
00224                  const char* codeUnits,
00225                  int32_t length,
00226                  UConverterCallbackReason reason,
00227                  UErrorCode * err)
00228 {
00229     if (reason <= UCNV_IRREGULAR)
00230     {
00231         *err = U_ZERO_ERROR;
00232     }
00233 }
00234 
00235 void   UCNV_TO_U_CALLBACK_SUBSTITUTE (
00236                  void *context,
00237                  UConverterToUnicodeArgs *toArgs,
00238                  const char* codeUnits,
00239                  int32_t length,
00240                  UConverterCallbackReason reason,
00241                  UErrorCode * err)
00242 {
00243     if (reason > UCNV_IRREGULAR)
00244     {
00245         return;
00246     }
00247     
00248     *err = U_ZERO_ERROR;
00249     ucnv_cbToUWriteSub(toArgs,0,err);
00250 
00251     return;
00252 }
00253 
00254 /*uses itou to get a unicode escape sequence of the offensive sequence,
00255  *and uses that as the substitution sequence
00256  */
00257 void  UCNV_TO_U_CALLBACK_ESCAPE (
00258                  void *context,
00259                  UConverterToUnicodeArgs *toArgs,
00260                  const char* codeUnits,
00261                  int32_t length,
00262                  UConverterCallbackReason reason,
00263                  UErrorCode * err)
00264 {
00265   UChar uniValueString[VALUE_STRING_LENGTH];
00266   int32_t valueStringLength = 0;
00267   int32_t i = 0;
00268   
00269   if (reason > UCNV_IRREGULAR)
00270   {
00271     return;
00272   }
00273 
00274   /* ### TODO:
00275    * This should use the new ucnv_cbWrite...() functions instead of doing
00276    * "tricks" as before we had a good callback API!
00277    * (Actually, this function is not all that bad.)
00278    */
00279 
00280   while (i < length)
00281     {
00282       uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;     /* adding % */
00283       uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;        /* adding X */
00284       itou (uniValueString + valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
00285       valueStringLength += 2;
00286     }
00287 
00288 
00289   /* reset the error */
00290   *err = U_ZERO_ERROR;
00291   
00292   ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
00293 
00294   return;
00295 }

Generated at Tue Dec 5 10:48:01 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000