Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ucnv2022.c

00001 /*  
00002 **********************************************************************
00003 *   Copyright (C) 2000, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *   file name:  ucnv2022.c
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2000feb03
00012 *   created by: Markus W. Scherer
00013 *
00014 *   Change history:
00015 *
00016 *   06/29/2000  helena  Major rewrite of the callback APIs.
00017 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
00018 *                       Changed implementation of toUnicode
00019 *                       function
00020 *   08/21/2000  Ram     Added support for ISO-2022-KR
00021 *   08/29/2000  Ram     Seperated implementation of EBCDIC to 
00022 *                       ucnvebdc.c
00023 *   09/20/2000  Ram     Added support for ISO-2022-CN
00024 *                       Added implementations for getNextUChar()
00025 *                       for specific 2022 country variants.
00026 *   10/31/2000  Ram     Implemented offsets logic functions 
00027 */
00028 
00029 #include "unicode/utypes.h"
00030 #include "cmemory.h"
00031 #include "ucmp16.h"
00032 #include "ucmp8.h"
00033 #include "unicode/ucnv_err.h"
00034 #include "ucnv_bld.h"
00035 #include "unicode/ucnv.h"
00036 #include "ucnv_cnv.h"
00037 #include "unicode/ustring.h"
00038 #include "cstring.h"
00039 
00040 #define TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS, myConverterData, err){ \
00041     if(*err ==U_BUFFER_OVERFLOW_ERROR){ \
00042     /*save the state and return */ \
00043     args->target += myTargetIndex; \
00044     args->source += mySourceIndex; \
00045     myConverterData->sourceIndex = 0; \
00046     myConverterData->targetIndex = 0; \
00047     args->converter->fromUnicodeStatus = isTargetUCharDBCS; \
00048     return; \
00049     } \
00050 }
00051 
00052 #define TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex, myConverterData, err){ \
00053     if(*err ==U_BUFFER_OVERFLOW_ERROR){ \
00054     /*save the state and return */ \
00055     args->target += myTargetIndex; \
00056     args->source += mySourceIndex; \
00057     myConverterData->sourceIndex = 0; \
00058     myConverterData->targetIndex = 0; \
00059     return; \
00060     } \
00061 }
00062 #define UCNV_SS2 "\x1B\x4E"
00063 #define UCNV_SS3 "\x1B\x4F"
00064 
00065 #define ESC 0x0B
00066 
00067 /* for ISO-2022JP implementation*/
00068 typedef enum  {
00069         ASCII = 0,
00070         ISO8859_1 = 1 ,
00071         ISO8859_7 = 2 ,
00072         JISX201  = 3,
00073         JISX208 = 4,
00074         JISX212 = 5,
00075         GB2312  =6,
00076         KSC5601 =7,
00077         INVALID_STATE
00078         
00079 } StateEnum;
00080 
00081 
00082 
00083 typedef enum {
00084         ASCII1=0,
00085         LATIN1,
00086         SBCS,
00087         DBCS,
00088         MBCS
00089         
00090 }Cnv2022Type;
00091 
00092 #define UCNV_OPTIONS_VERSION_MASK 0xf
00093 
00094 typedef struct{
00095     UConverter *currentConverter;
00096     UConverter *fromUnicodeConverter;
00097     UBool isFirstBuffer;
00098     StateEnum toUnicodeCurrentState;
00099     StateEnum fromUnicodeCurrentState;
00100     Cnv2022Type currentType;
00101     int plane;
00102     uint8_t escSeq2022[10];
00103     UConverter* myConverterArray[9];
00104     int32_t targetIndex;
00105     int32_t sourceIndex;
00106     UBool isEscapeAppended;
00107     UBool isShiftAppended;
00108     UBool isLocaleSpecified;
00109     uint32_t key;
00110     uint32_t version;
00111     char locale[3];
00112     char name[30];
00113 }UConverterDataISO2022;
00114 
00115 /* ISO-2022 ----------------------------------------------------------------- */
00116 
00117 /*Forward declaration */
00118 U_CFUNC void T_UConverter_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
00119                                             UErrorCode * err);
00120 
00121 U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
00122                                                           UErrorCode * err);
00123 
00124 
00125 /* Protos */
00126 /***************** ISO-2022 ********************************/
00127 U_CFUNC void T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs * args,
00128                                              UErrorCode * err);
00129 
00130 U_CFUNC void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
00131                                                             UErrorCode * err);
00132 
00133 U_CFUNC void T_UConverter_fromUnicode_ISO_2022(UConverterFromUnicodeArgs * args,
00134                                                UErrorCode * err);
00135 
00136 U_CFUNC void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
00137                                                               UErrorCode * err);
00138 
00139 U_CFUNC UChar32 T_UConverter_getNextUChar_ISO_2022 (UConverterToUnicodeArgs * args,
00140                                                     UErrorCode * err);
00141 
00142 /***************** ISO-2022-JP ********************************/
00143 U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args, 
00144                                                 UErrorCode* err);
00145 
00146 U_CFUNC void UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, 
00147                                                 UErrorCode* err);
00148 
00149 U_CFUNC void UConverter_toUnicode_ISO_2022_JP(UConverterToUnicodeArgs* args, 
00150                                               UErrorCode* err);
00151 
00152 U_CFUNC void UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, 
00153                                                             UErrorCode* err);
00154 
00155 U_CFUNC UChar32 UConverter_getNextUChar_ISO_2022_JP (UConverterToUnicodeArgs * args,
00156                                                      UErrorCode * err);
00157 
00158 /***************** ISO-2022-KR ********************************/
00159 U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args, 
00160                                                 UErrorCode* err);
00161 
00162 U_CFUNC void UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, 
00163                                                               UErrorCode* err);
00164 
00165 U_CFUNC void UConverter_toUnicode_ISO_2022_KR(UConverterToUnicodeArgs* args, 
00166                                               UErrorCode* err);
00167 
00168 U_CFUNC void UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, 
00169                                                             UErrorCode* err);
00170 
00171 U_CFUNC UChar32 UConverter_getNextUChar_ISO_2022_KR (UConverterToUnicodeArgs * args,
00172                                                      UErrorCode * err);
00173 
00174 /***************** ISO-2022-CN ********************************/
00175 U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args, 
00176                                                 UErrorCode* err);
00177 
00178 U_CFUNC void UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, 
00179                                                 UErrorCode* err);
00180 
00181 U_CFUNC void UConverter_toUnicode_ISO_2022_CN(UConverterToUnicodeArgs* args, 
00182                                               UErrorCode* err);
00183 
00184 U_CFUNC void UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, 
00185                                                             UErrorCode* err);
00186 
00187 U_CFUNC UChar32 UConverter_getNextUChar_ISO_2022_CN (UConverterToUnicodeArgs * args,
00188                                                      UErrorCode * err);
00189 
00190 #define ESC_2022 0x1B /*ESC*/
00191 
00192 typedef enum 
00193 {
00194         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
00195         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
00196         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
00197         VALID_MAYBE_TERMINAL_2022 = 2, /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
00198         VALID_SS2_SEQUENCE=3,
00199         VALID_SS3_SEQUENCE=4
00200         
00201 } UCNV_TableStates_2022;
00202 
00203 /*
00204 * The way these state transition arrays work is:
00205 * ex : ESC$B is the sequence for JISX208
00206 *      a) First Iteration: char is ESC
00207 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
00208 *             int x = normalize_esq_chars_2022[27] which is equal to 1
00209 *         ii) Search for this value in escSeqStateTable_Key_2022[]
00210 *             value of x is stored at escSeqStateTable_Key_2022[0]
00211 *        iii) Save this index as offset
00212 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
00213 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
00214 *     b) Switch on this state and continue to next char
00215 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
00216 *             which is normalize_esq_chars_2022[36] == 4
00217 *         ii) x is currently 1(from above) 
00218 *               x<<=5 -- x is now 32
00219 *               x+=normalize_esq_chars_2022[36]
00220 *               now x is 36
00221 *        iii) Search for this value in escSeqStateTable_Key_2022[]
00222 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
00223 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
00224 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
00225 *     c) Switch on this state and continue to next char
00226 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
00227 *        ii) x is currently 36 (from above) 
00228 *            x<<=5 -- x is now 1152
00229 *            x+=normalize_esq_chars_2022[66]
00230 *            now x is 1161
00231 *       iii) Search for this value in escSeqStateTable_Key_2022[]
00232 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
00233 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
00234 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
00235 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
00236 */     
00237 
00238 
00239 /*Below are the 3 arrays depicting a state transition table*/
00240 int8_t normalize_esq_chars_2022[256] = {
00241 /*       0      1       2       3       4      5       6        7       8       9           */
00242 
00243          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00244         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00245         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
00246         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,0      ,0
00247         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
00248         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00249         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
00250         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
00251         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
00252         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00253         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00254         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00255         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00256         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00257         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00258         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00259         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00260         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00261         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00262         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00263         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00264         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00265         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00266         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00267         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
00268         ,0     ,0      ,0      ,0      ,0      ,0
00269 };
00270 
00271 #define MAX_STATES_2022 74
00272 int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
00273     /*      0           1           2           3           4           5           6           7           8           9           */
00274     
00275          1          ,34         ,36         ,39         ,55         ,57         ,60         ,1093       ,1096       ,1097       
00276         ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106       ,1109       
00277         ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257       ,1768       
00278         ,1773       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940      ,36942      ,36943      
00279         ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644      ,37646      ,37711      
00280         ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138      ,40139      ,40140      
00281         ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630   ,35947631   ,35947635   
00282         ,35947636   ,35947638
00283 };
00284 
00285 
00286 const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = {
00287  /*      0                      1                    2                      3                   4               5                           6                          7                    8                   9    */
00288     
00289          NULL                   ,NULL               ,NULL                ,NULL              ,NULL               ,NULL                   ,NULL                   ,"latin1"               ,"latin1"               ,"latin1"               
00290         ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"JISX-201"             ,"JISX-201"             ,"latin1"               ,"latin1"               
00291         ,NULL                   ,"JISX-208"         ,"gb_2312_80-1"     ,"JISX-208"         ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"UTF8"                 ,"ISO-8859-1"           
00292         ,"ISO-8859-7"           ,NULL               ,"ibm-955"          ,"ibm-367"          ,"ibm-952"          ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"             ,"ibm-952"              ,"ibm-964"              
00293         ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"          ,"ibm-964"          ,"gb_2312_80-1"         ,"ibm-949"              ,"ISO-IR-165"           ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     
00294         ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" 
00295         ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"          ,"ibm-913"          ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"             ,"ibm-920"              ,"ibm-915"              
00296         ,"ibm-915"              ,"latin1"
00297 };
00298 
00299 UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = {
00300     /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
00301     
00302          VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022    ,VALID_SS2_SEQUENCE        ,VALID_SS3_SEQUENCE         ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_MAYBE_TERMINAL_2022  
00303         ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        
00304         ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        
00305         ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        
00306         ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        
00307         ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        
00308         ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        
00309         ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
00310 };
00311 
00312 
00313 
00314 /*for 2022 looks ahead in the stream
00315 *to determine the longest possible convertible
00316 *data stream
00317 */
00318 static const char* getEndOfBuffer_2022(const char** source,
00319                                        const char* sourceLimit,
00320                                        UBool flush); 
00321 /* Type def for refactoring changeState_2022 code*/
00322 typedef enum{               
00323     ISO_2022=0,
00324     ISO_2022_JP=1,
00325     ISO_2022_CN=2
00326 } Variant2022;
00327 
00328 /*runs through a state machine to determine the escape sequence - codepage correspondance
00329 *changes the pointer pointed to be _this->extraInfo
00330 */
00331 static void changeState_2022(UConverter* _this,
00332                                 const char** source, 
00333                                 const char* sourceLimit,
00334                                 UBool flush,Variant2022 var,int* plane,
00335                                 UErrorCode* err); 
00336 
00337 
00338 UCNV_TableStates_2022 getKey_2022(char source,
00339                                     int32_t* key,
00340                                     int32_t* offset);
00341             
00342 /*********** ISO 2022 Converter Protos ***********/
00343 static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode);
00344 static void _ISO2022Close(UConverter *converter);
00345 static void _ISO2022Reset(UConverter *converter);
00346 static const char* _ISO2022getName(const UConverter* cnv);
00347 
00348 /************ protos of functions for setting the initial state *********************/
00349 static void setInitialStateToUnicodeJPCN(UConverter* converter,UConverterDataISO2022 *myConverterData);
00350 static void setInitialStateFromUnicodeJPCN(UConverterDataISO2022 *myConverterData);
00351 static void setInitialStateToUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData);
00352 static void setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData);
00353   
00354 /*************** Converter implemenations ******************/
00355 static const UConverterImpl _ISO2022Impl={
00356     UCNV_ISO_2022,
00357     
00358     NULL,
00359     NULL,
00360     
00361     _ISO2022Open,
00362     _ISO2022Close,
00363     _ISO2022Reset,
00364     
00365     T_UConverter_toUnicode_ISO_2022,
00366     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
00367     T_UConverter_fromUnicode_ISO_2022,
00368     T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC,
00369     T_UConverter_getNextUChar_ISO_2022,
00370     
00371     NULL,
00372     _ISO2022getName
00373 };
00374             
00375 const UConverterStaticData _ISO2022StaticData={
00376     sizeof(UConverterStaticData),
00377     "ISO_2022",
00378     2022, 
00379     UCNV_IBM, 
00380     UCNV_ISO_2022, 
00381     1, 
00382     4,
00383     { 0x1a, 0, 0, 0 },
00384     1, 
00385     FALSE, 
00386     FALSE,
00387     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} /* reserved */
00388 };
00389             
00390             
00391 const UConverterSharedData _ISO2022Data={
00392     sizeof(UConverterSharedData),
00393     ~((uint32_t) 0),
00394     NULL, 
00395     NULL, 
00396     &_ISO2022StaticData, 
00397     FALSE, 
00398     &_ISO2022Impl, 
00399     0
00400 };
00401 
00402 /*************JP****************/
00403 static const UConverterImpl _ISO2022JPImpl={
00404 
00405     UCNV_ISO_2022,
00406     
00407     NULL,
00408     NULL,
00409     
00410     _ISO2022Open,
00411     _ISO2022Close,
00412     _ISO2022Reset,
00413     
00414     UConverter_toUnicode_ISO_2022_JP,
00415     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
00416     UConverter_fromUnicode_ISO_2022_JP,
00417     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
00418     UConverter_getNextUChar_ISO_2022_JP,
00419     
00420     NULL,
00421     _ISO2022getName
00422 };
00423 const UConverterSharedData _ISO2022JPData={
00424     sizeof(UConverterSharedData),
00425     ~((uint32_t) 0),
00426     NULL, 
00427     NULL, 
00428     &_ISO2022StaticData, 
00429     FALSE, 
00430     &_ISO2022JPImpl, 
00431     0
00432 };
00433 /************* KR ***************/
00434 static const UConverterImpl _ISO2022KRImpl={
00435 
00436     UCNV_ISO_2022,
00437     
00438     NULL,
00439     NULL,
00440     
00441     _ISO2022Open,
00442     _ISO2022Close,
00443     _ISO2022Reset,
00444     
00445     UConverter_toUnicode_ISO_2022_KR,
00446     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
00447     UConverter_fromUnicode_ISO_2022_KR,
00448     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
00449     UConverter_getNextUChar_ISO_2022_KR,
00450     
00451     NULL,
00452     _ISO2022getName
00453 };
00454 
00455 const UConverterSharedData _ISO2022KRData={
00456     sizeof(UConverterSharedData),
00457     ~((uint32_t) 0),
00458     NULL, 
00459     NULL, 
00460     &_ISO2022StaticData, 
00461     FALSE, 
00462     &_ISO2022KRImpl, 
00463     0
00464 };
00465 /*************** CN ***************/
00466 static const UConverterImpl _ISO2022CNImpl={
00467 
00468     UCNV_ISO_2022,
00469     
00470     NULL,
00471     NULL,
00472     
00473     _ISO2022Open,
00474     _ISO2022Close,
00475     _ISO2022Reset,
00476     
00477     UConverter_toUnicode_ISO_2022_CN,
00478     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
00479     UConverter_fromUnicode_ISO_2022_CN,
00480     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
00481     UConverter_getNextUChar_ISO_2022_CN,
00482     
00483     NULL,
00484     _ISO2022getName
00485 };
00486 const UConverterSharedData _ISO2022CNData={
00487     sizeof(UConverterSharedData),
00488     ~((uint32_t) 0),
00489     NULL, 
00490     NULL, 
00491     &_ISO2022StaticData, 
00492     FALSE, 
00493     &_ISO2022CNImpl, 
00494     0
00495 };
00496 
00497 /**********/
00498 
00499 static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode){
00500     
00501     char myLocale[6]={' ',' ',' ',' ',' ',' '};
00502     
00503     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
00504     if(cnv->extraInfo != NULL) {
00505         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; 
00506         myConverterData->currentConverter = NULL;
00507         myConverterData->fromUnicodeConverter = NULL;
00508         myConverterData->plane = -1;
00509         myConverterData->key =0;
00510         cnv->fromUnicodeStatus =FALSE;
00511         if(locale){
00512             uprv_strcpy(myLocale,locale);
00513             myConverterData->isLocaleSpecified = TRUE;
00514         }
00515         myConverterData->version= 0;
00516         myConverterData->myConverterArray[0] =NULL;
00517         if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && 
00518             (myLocale[2]=='_' || myLocale[2]=='\0')){
00519             
00520             /* open the required converters and cache them */
00521             myConverterData->myConverterArray[0]=   ucnv_open("ASCII", errorCode );
00522             myConverterData->myConverterArray[1]=   ucnv_open("ISO8859_1", errorCode);
00523             myConverterData->myConverterArray[2]=   ucnv_open("ISO8859_7", errorCode);
00524             myConverterData->myConverterArray[3]=   ucnv_open("jisx-201", errorCode);
00525             myConverterData->myConverterArray[4]=   ucnv_open("jisx-208", errorCode);
00526             myConverterData->myConverterArray[5]=   ucnv_open("jisx-212", errorCode);
00527             myConverterData->myConverterArray[6]=   ucnv_open("gb_2312_80-1", errorCode);
00528             myConverterData->myConverterArray[7]=   ucnv_open("ksc_5601_1", errorCode);
00529             myConverterData->myConverterArray[8]=   NULL;
00530             
00531             /* initialize the state variables */
00532             setInitialStateToUnicodeJPCN(cnv, myConverterData);
00533             setInitialStateFromUnicodeJPCN(myConverterData);
00534 
00535             /*set the substitution chars*/
00536             ucnv_setSubstChars(cnv,"\x1b\x28\x42\x1A", 4, errorCode);
00537             
00538             /* set the function pointers to appropriate funtions */
00539             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
00540             uprv_strcpy(myConverterData->locale,"ja");
00541             uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja");
00542             if(options){
00543                 switch (options & UCNV_OPTIONS_VERSION_MASK){
00544                     case 0:
00545                         myConverterData->version = 0;
00546                         break;
00547                     case 1:
00548                         myConverterData->version = 1;
00549                         uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=1");
00550                         break;
00551                     case 2:
00552                         myConverterData->version = 2;
00553                         uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=2");
00554                         break;
00555                     default:
00556                         myConverterData->version = 0;
00557                 }
00558             }
00559         }
00560         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 
00561             (myLocale[2]=='_' || myLocale[2]=='\0')){
00562             
00563             /* initialize the state variables */
00564             setInitialStateToUnicodeKR(cnv, myConverterData);
00565             setInitialStateFromUnicodeKR(cnv,myConverterData);
00566 
00567             myConverterData->fromUnicodeConverter  = ucnv_open("ibm-949",errorCode);
00568 
00569             /*set the substitution chars*/
00570             ucnv_setSubstChars(cnv,"\x0F\x1A", 2, errorCode);
00571             
00572             /* set the function pointers to appropriate funtions */
00573             cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
00574 
00575             uprv_strcpy(myConverterData->locale,"kr");
00576             uprv_strcpy(myConverterData->name,"ISO_2022,locale=kr");
00577             myConverterData->version=0;
00578             
00579         }
00580         else if((myLocale[0]=='z'|| myLocale[0]=='c') && (myLocale[1]=='h'|| myLocale[1]=='n') && 
00581             (myLocale[2]=='_' || myLocale[2]=='\0')){
00582             
00583             /* open the required converters and cache them */
00584             myConverterData->myConverterArray[0] = ucnv_open("ASCII",errorCode);
00585             myConverterData->myConverterArray[1] = ucnv_open("gb_2312_80-1",errorCode);
00586             myConverterData->myConverterArray[2] = ucnv_open("ISO-IR-165",errorCode);
00587             myConverterData->myConverterArray[3] = ucnv_open("CNS-11643-1992",errorCode);
00588             myConverterData->myConverterArray[4] = NULL;
00589             
00590             /*initialize the state variables*/
00591             setInitialStateToUnicodeJPCN(cnv, myConverterData);
00592             setInitialStateFromUnicodeJPCN(myConverterData);
00593 
00594             ucnv_setSubstChars(cnv,"\x0F\x1A", 2, errorCode);
00595 
00596             /* set the function pointers to appropriate funtions */
00597             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
00598             uprv_strcpy(myConverterData->locale,"cn");
00599             uprv_strcpy(myConverterData->name,"ISO_2022,locale=cn");
00600 
00601             if(options){
00602                 switch (options  & UCNV_OPTIONS_VERSION_MASK){
00603                     case 0:
00604                         myConverterData->version = 0;
00605                         break;
00606                     case 1:
00607                         myConverterData->version = 1;
00608                         uprv_strcpy(myConverterData->name,"ISO_2022,locale=cn,version=1");
00609                         break;
00610                     default:
00611                         myConverterData->version = 0;
00612                 }
00613             }
00614             
00615             
00616         }
00617         else{
00618             /* append the UTF-8 escape sequence */
00619             cnv->charErrorBufferLength = 3;
00620             cnv->charErrorBuffer[0] = 0x1b;
00621             cnv->charErrorBuffer[1] = 0x25;
00622             cnv->charErrorBuffer[2] = 0x42;
00623 
00624             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
00625             /* initialize the state variables */
00626             myConverterData->isLocaleSpecified=FALSE;
00627             uprv_strcpy(myConverterData->name,"ISO_2022");
00628         }
00629                     
00630     } else {
00631         *errorCode = U_MEMORY_ALLOCATION_ERROR;
00632     }
00633     
00634 }
00635 
00636 
00637 static void
00638 _ISO2022Close(UConverter *converter) {
00639    UConverter **array = ((UConverterDataISO2022 *) (converter->extraInfo))->myConverterArray;
00640     
00641     if (converter->extraInfo != NULL) {
00642 
00643         /*close the array of converter pointers and free the memory*/
00644         while(*array!=NULL){
00645             if(*array==((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter){
00646                ((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter=NULL;
00647             }
00648             ucnv_close(*array++);
00649             
00650         }
00651         if(((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter){
00652             ucnv_close(((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
00653         }
00654         uprv_free (converter->extraInfo);
00655     }
00656 }
00657 
00658 static void
00659 _ISO2022Reset(UConverter *converter) {
00660     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
00661     if(! myConverterData->isLocaleSpecified){
00662         
00663         /* re-append UTF-8 escape sequence */
00664         converter->charErrorBufferLength = 3;
00665         converter->charErrorBuffer[0] = 0x1b;
00666         converter->charErrorBuffer[1] = 0x28;
00667         converter->charErrorBuffer[2] = 0x42;
00668     }
00669     else {
00670         /* reset the state variables */
00671         setInitialStateToUnicodeJPCN(converter, myConverterData);
00672         setInitialStateFromUnicodeJPCN(myConverterData);
00673         setInitialStateToUnicodeKR(converter, myConverterData);
00674         setInitialStateFromUnicodeKR(converter, myConverterData);
00675     }
00676     if (converter->mode == UCNV_SO && !myConverterData->isLocaleSpecified){
00677 
00678         ucnv_close (myConverterData->currentConverter);
00679         myConverterData->currentConverter=NULL;
00680     }
00681 
00682 }
00683 static const char* _ISO2022getName(const UConverter* cnv){
00684     if(cnv->extraInfo){
00685         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
00686         return myData->name;
00687     }
00688     return NULL;
00689 }
00690 
00691 static void setInitialStateToUnicodeJPCN(UConverter* converter,UConverterDataISO2022 *myConverterData ){
00692     myConverterData->toUnicodeCurrentState =ASCII;
00693     myConverterData->targetIndex = 0;
00694     myConverterData->sourceIndex =0;
00695     myConverterData->currentConverter = NULL;
00696     myConverterData->isFirstBuffer = TRUE;
00697     converter->mode = UCNV_SI;
00698 
00699 }
00700 
00701 static void setInitialStateFromUnicodeJPCN(UConverterDataISO2022 *myConverterData){
00702     myConverterData->fromUnicodeCurrentState= ASCII;
00703     myConverterData->targetIndex = 0;
00704     myConverterData->sourceIndex =0;
00705     myConverterData->isEscapeAppended=FALSE;
00706     myConverterData->isShiftAppended=FALSE;
00707     myConverterData->isLocaleSpecified=TRUE;
00708 }
00709 
00710 static void setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
00711 
00712     myConverterData->isLocaleSpecified=TRUE;
00713     converter->mode = UCNV_SI;
00714     myConverterData->currentConverter = myConverterData->fromUnicodeConverter;
00715 
00716 }
00717 
00718 static void setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
00719    /* in ISO-2022-KR the desginator sequence appears only once
00720     * in a file so we append it only once
00721     */
00722     if( converter->charErrorBufferLength==0){
00723 
00724         converter->charErrorBufferLength = 4;
00725         converter->charErrorBuffer[0] = 0x1b;
00726         converter->charErrorBuffer[1] = 0x24;
00727         converter->charErrorBuffer[2] = 0x29;
00728         converter->charErrorBuffer[3] = 0x43;
00729     }
00730     myConverterData->isLocaleSpecified=TRUE;
00731     myConverterData->isShiftAppended=FALSE;
00732     
00733 }
00734 
00735 /**********************************************************************************
00736 *  ISO-2022 Converter
00737 *
00738 *
00739 */
00740 
00741 U_CFUNC UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
00742                                                    UErrorCode* err){
00743     const char* mySourceLimit;
00744     int plane=0; /*dummy variable*/
00745     /*Arguments Check*/
00746     if  (args->sourceLimit < args->source){
00747         *err = U_ILLEGAL_ARGUMENT_ERROR;
00748         return 0xffff;
00749     }
00750     
00751     do{
00752         
00753         mySourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, TRUE); 
00754         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
00755         if (args->converter->mode == UCNV_SO) /*Already doing some conversion*/{
00756             
00757             return ucnv_getNextUChar(((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter,
00758                 &(args->source),
00759                 mySourceLimit,
00760                 err);
00761         }
00762         /*-Done with buffer with entire buffer
00763         *-Error while converting
00764         */
00765         changeState_2022(args->converter,
00766                &(args->source), 
00767                args->sourceLimit,
00768                TRUE,
00769                ISO_2022,
00770                &plane,
00771                err);
00772     }while(args->source < args->sourceLimit);
00773     
00774     return 0xffff;
00775 }
00776 
00777 
00778 U_CFUNC void T_UConverter_fromUnicode_ISO_2022(UConverterFromUnicodeArgs *args,
00779                                                UErrorCode* err){
00780     
00781     T_UConverter_fromUnicode_UTF8(args, err);
00782     
00783 }
00784 
00785 
00786 U_CFUNC void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args,
00787                                                              UErrorCode* err){
00788     
00789     char const* targetStart = args->target;
00790     T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC(args, err);
00791     {
00792         
00793         int32_t len = args->target - targetStart;
00794         int32_t i;
00795         /* uprv_memmove(offsets+3, offsets, len);   MEMMOVE SEEMS BROKEN --srl*/ 
00796         
00797         for(i=len-1;i>=0;i--)       args->offsets[i] = args->offsets[i];
00798     }
00799     
00800 }
00801 
00802 
00803 U_CFUNC void T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs *args,
00804                                              UErrorCode* err){
00805     
00806     const char *mySourceLimit;
00807     char const* sourceStart;
00808     UConverter *saveThis;
00809     int plane =0 ;/*dummy variable*/
00810     /*Arguments Check*/
00811     if (U_FAILURE(*err)) 
00812         return;
00813     
00814     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
00815         *err = U_ILLEGAL_ARGUMENT_ERROR;
00816         return;
00817     }
00818     
00819     do{
00820         
00821         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
00822         mySourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush); 
00823         
00824         if (args->converter->mode == UCNV_SO) /*Already doing some conversion*/{
00825             
00826             saveThis = args->converter;
00827             args->offsets = NULL;
00828             args->converter = ((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter;
00829             ucnv_toUnicode(args->converter,
00830                 &args->target,
00831                 args->targetLimit,
00832                 &args->source,
00833                 mySourceLimit,
00834                 args->offsets,
00835                 args->flush,
00836                 err);
00837             args->converter = saveThis;
00838         }
00839         if((((UConverterDataISO2022 *)args->converter->extraInfo)->isFirstBuffer) && (args->source[0]!=(char)ESC_2022)
00840             &&  (((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter==NULL)){
00841             
00842             
00843             saveThis = args->converter;
00844             args->offsets = NULL;
00845             ((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter = ucnv_open("ASCII",err);
00846             
00847             if(U_FAILURE(*err)){
00848                 break;
00849             }
00850             
00851             args->converter = ((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter;
00852             ucnv_toUnicode(args->converter,
00853                 &args->target,
00854                 args->targetLimit,
00855                 &args->source,
00856                 mySourceLimit,
00857                 args->offsets,
00858                 args->flush,
00859                 err);
00860             args->converter = saveThis;
00861             args->converter->mode = UCNV_SO;
00862             ((UConverterDataISO2022*)(args->converter->extraInfo))->isFirstBuffer=FALSE;
00863             
00864         }
00865         
00866         /*-Done with buffer with entire buffer
00867         -Error while converting
00868         */
00869         
00870         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) 
00871             return;
00872         
00873         sourceStart = args->source;
00874         changeState_2022(args->converter,
00875                &(args->source), 
00876                args->sourceLimit,
00877                TRUE,
00878                ISO_2022,
00879                &plane,
00880                err);
00881         /* args->source = sourceStart; */
00882         
00883         
00884     }while(args->source < args->sourceLimit);
00885     
00886     ((UConverterDataISO2022*)(args->converter->extraInfo))->isFirstBuffer=FALSE;
00887     
00888     return;
00889 }
00890 
00891 U_CFUNC void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
00892                                                            UErrorCode* err){
00893     
00894     int32_t myOffset=0;
00895     int32_t base = 0;
00896     const char* mySourceLimit;
00897     char const* sourceStart;
00898     UConverter* saveThis = NULL;
00899     int plane =0;/*dummy variable*/
00900     /*Arguments Check*/
00901     if (U_FAILURE(*err)) 
00902         return;
00903     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
00904         *err = U_ILLEGAL_ARGUMENT_ERROR;
00905         return;
00906     }
00907     
00908     do{
00909         mySourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush); 
00910         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
00911         
00912         if (args->converter->mode == UCNV_SO) /*Already doing some conversion*/{
00913             const UChar* myTargetStart = args->target;
00914             
00915             saveThis = args->converter;
00916             args->converter = ((UConverterDataISO2022*)(saveThis->extraInfo))->currentConverter;
00917             ucnv_toUnicode(args->converter, 
00918                 &(args->target),
00919                 args->targetLimit,
00920                 &(args->source),
00921                 mySourceLimit,
00922                 args->offsets,
00923                 args->flush,
00924                 err);
00925             
00926             args->converter = saveThis;
00927             {
00928                 int32_t lim =  args->target - myTargetStart;
00929                 int32_t i = 0;
00930                 for (i=base; i < lim;i++){   
00931                     args->offsets[i] += myOffset;
00932                 }
00933                 base += lim;
00934             }
00935             
00936         }
00937         if(((UConverterDataISO2022 *)args->converter->extraInfo)->isFirstBuffer && args->source[0]!=ESC_2022
00938             && ((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter==NULL){
00939             
00940             const UChar* myTargetStart = args->target;
00941             UConverter* saveThis = args->converter;
00942             args->offsets = NULL;
00943             ((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter = ucnv_open("ASCII",err);
00944             
00945             if(U_FAILURE(*err)){
00946                 break;
00947             }
00948             
00949             args->converter = ((UConverterDataISO2022*)(args->converter->extraInfo))->currentConverter;
00950             ucnv_toUnicode(args->converter,
00951                 &args->target,
00952                 args->targetLimit,
00953                 &args->source,
00954                 mySourceLimit,
00955                 args->offsets,
00956                 args->flush,
00957                 err);
00958             args->converter = saveThis;
00959             args->converter->mode = UCNV_SO;
00960             ((UConverterDataISO2022*)(args->converter->extraInfo))->isFirstBuffer=FALSE;
00961             args->converter = saveThis;
00962             {
00963                 int32_t lim =  args->target - myTargetStart;
00964                 int32_t i = 0;
00965                 for (i=base; i < lim;i++){   
00966                     args->offsets[i] += myOffset;
00967                 }
00968                 base += lim;
00969             }
00970         }
00971         /*-Done with buffer with entire buffer
00972         -Error while converting
00973         */
00974         
00975         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) 
00976             return;
00977         
00978         sourceStart = args->source;
00979         changeState_2022(args->converter,
00980                &(args->source), 
00981                args->sourceLimit,
00982                TRUE,
00983                ISO_2022,
00984                &plane,
00985                err);
00986         myOffset += args->source - sourceStart;
00987         
00988     }while(mySourceLimit != args->sourceLimit);
00989     
00990     return;
00991 }
00992 UCNV_TableStates_2022 getKey_2022(char c,
00993                                   int32_t* key,
00994                                   int32_t* offset){
00995     int32_t togo = *key;
00996     int32_t low = 0;
00997     int32_t hi = MAX_STATES_2022;
00998     int32_t oldmid=0;
00999     
01000     if (*key == 0){
01001         togo = (int8_t)normalize_esq_chars_2022[(int)c];
01002     }
01003     else{
01004         togo <<= 5;
01005         togo += (int8_t)normalize_esq_chars_2022[(int)c];
01006     }
01007     
01008     while (hi != low)  /*binary search*/{
01009         
01010         register int32_t mid = (hi+low) >> 1; /*Finds median*/
01011         
01012         if (mid == oldmid) 
01013             break;
01014         
01015         if (escSeqStateTable_Key_2022[mid] > togo){
01016             hi = mid;
01017         }
01018         else if (escSeqStateTable_Key_2022[mid] < togo){  
01019             low = mid;
01020         }
01021         else /*we found it*/{
01022             *key = togo;
01023             *offset = mid;
01024             return escSeqStateTable_Value_2022[mid];
01025         }
01026         oldmid = mid;
01027         
01028     }
01029     
01030     *key = 0;
01031     *offset = 0;
01032     return INVALID_2022;
01033 }
01034 
01035 
01036 
01037 /*Checks the characters of the buffer against valid 2022 escape sequences
01038 *if the match we return a pointer to the initial start of the sequence otherwise
01039 *we return sourceLimit
01040 */
01041 static const char* getEndOfBuffer_2022(const char** source,
01042                                        const char* sourceLimit,
01043                                        UBool flush){
01044     
01045     const char* mySource = *source;
01046     
01047     if (*source >= sourceLimit) 
01048         return sourceLimit;
01049     
01050     do{
01051         
01052         if (*mySource == ESC_2022){
01053             int8_t i;
01054             int32_t key = 0;
01055             int32_t offset;
01056             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
01057             
01058             /* Kludge: I could not
01059             * figure out the reason for validating an escape sequence
01060             * twice - once here and once in changeState_2022(). 
01061             * is it possible to have an ESC character in a ISO2022
01062             * byte stream which is valid in a code page? Is it legal?
01063             */
01064             for (i=0; 
01065             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
01066             i++) {
01067                 value =  getKey_2022(*(mySource+i), &key, &offset);
01068             }
01069             if (value > 0 || *mySource==ESC_2022) 
01070                 return mySource;
01071             
01072             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) 
01073                 return sourceLimit;
01074         }
01075         else if(*mySource == (char)UCNV_SI || *mySource==(char)UCNV_SO){
01076             return mySource;
01077             
01078         }
01079         
01080     }while (mySource++ < sourceLimit);
01081     
01082     return sourceLimit;
01083 }
01084 
01085 
01086 /**************************************ISO-2022-JP*************************************************/
01087 
01088 /************************************** IMPORTANT **************************************************
01089 * The UConverter_fromUnicode_ISO2022_JP converter doesnot use ucnv_fromUnicode() functions for SBCS,DBCS and
01090 * MBCS instead the values are obtained directly by accessing the sharedData structs through ucmp8_getU() 
01091 * ucmp16_getU() macros,and for MBCS by emulating the Markus's code to increase speed, reduce the 
01092 * overhead of function call and make it efficient.The converter iterates over each Unicode codepoint 
01093 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is 
01094 * processed one char at a time it would make sense to reduce the extra processing a canned converter 
01095 * would do as far as possible.
01096 *
01097 * If the implementation of these macros or structure of sharedData struct change in the future, make 
01098 * sure that ISO-2022 is also changed. 
01099 ***************************************************************************************************
01100 */
01101 
01102 /***************************************************************************************************
01103 * Rules for ISO-2022-jp encoding
01104 * (i)   Escape sequences must be fully contained within a line they should not 
01105 *       span new lines or CRs
01106 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
01107 *       JIS-Roman character escape sequence should follow before the line terminates
01108 * (iii) If the first character on the line is represented by two bytes then a two 
01109 *       byte character escape sequence should precede it    
01110 * (iv)  If no escape sequence is encountered then the characters are ASCII
01111 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
01112 *       and invoked with SS2 (ESC N).
01113 * (vi)  If there is any G0 designation in text, there must be a switch to
01114 *       ASCII or to JIS X 0201-Roman before a space character (but not
01115 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
01116 *       characters such as tab or CRLF.
01117 * (vi)  Supported encodings:
01118 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
01119 *
01120 *  source : RFC-1554
01121 *
01122 *          JISX201, JISX208,JISX212 : new .cnv data files created
01123 *          KSC5601 : alias to ibm-949 mapping table
01124 *          GB2312 : alias to ibm-1386 mapping table    
01125 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
01126 *          ISO-8859-7 : alisas to ibm-9409 mapping table
01127 */
01128 
01129 static Cnv2022Type myConverterType[8]={
01130     ASCII1,
01131     LATIN1,
01132     SBCS,
01133     SBCS,
01134     DBCS,
01135     DBCS,
01136     DBCS,
01137     DBCS,
01138         
01139 };
01140 
01141 static StateEnum nextStateArray[3][8]= {
01142     {JISX201,INVALID_STATE,INVALID_STATE,JISX208,ASCII,INVALID_STATE,INVALID_STATE,INVALID_STATE},
01143     {JISX201,INVALID_STATE,INVALID_STATE,JISX208,JISX212,ASCII,INVALID_STATE,INVALID_STATE},
01144     {ISO8859_1,ISO8859_7,JISX201,JISX208,JISX212,GB2312,KSC5601,ASCII}
01145 };
01146 static  const char* escSeqChars[8] ={
01147     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
01148     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
01149     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
01150     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
01151     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
01152     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
01153     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
01154     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
01155         
01156 };
01157 
01158 
01159 static void concatChar(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
01160                        int8_t charToAppend,UErrorCode* err,int32_t *sourceIndex);
01161 
01162 static void concatEscape(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
01163                          const char* strToAppend,UErrorCode* err,int len,int32_t *sourceIndex);
01164 
01165 static void concatString(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
01166                          const UChar32* strToAppend,UErrorCode* err,int32_t *sourceIndex);
01167 
01168 /*
01169 * The iteration over various code pages works this way:
01170 * i)   Get the currentState from myConverterData->currentState
01171 * ii)  Check if the character is mapped to a valid character in the currentState
01172 *      Yes ->  a) set the initIterState to currentState
01173 *       b) remain in this state until an invalid character is found
01174 *      No  ->  a) go to the next code page and find the character
01175 * iii) Before changing the state increment the current state check if the current state 
01176 *      is equal to the intitIteration state
01177 *      Yes ->  A character that cannot be represented in any of the supported encodings
01178 *       break and return a U_INVALID_CHARACTER error
01179 *      No  ->  Continue and find the character in next code page
01180 *
01181 *
01182 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages 
01183 */
01184 
01185 
01186 U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args, UErrorCode* err){
01187 
01188     UChar* mySource =(UChar*)args->source;
01189     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
01190     UConverterCallbackReason reason;
01191     UBool isEscapeAppended = FALSE;
01192     StateEnum initIterState;
01193     unsigned char *myTarget = (unsigned char *) args->target; 
01194     const UChar *saveSource;
01195     char *saveTarget;
01196     int32_t myTargetLength = args->targetLimit - args->target;
01197     int32_t mySourceLength = args->sourceLimit - args->source;
01198     int32_t mySourceIndex = 0;
01199     int32_t myTargetIndex = 0;
01200     CompactShortArray *myFromUnicodeDBCS = NULL;
01201     CompactShortArray *myFromUnicodeDBCSFallback = NULL;
01202     CompactByteArray  *myFromUnicodeSBCS = NULL;
01203     CompactByteArray  *myFromUnicodeSBCSFallback = NULL;
01204     UChar32 targetUniChar = missingCharMarker;
01205     StateEnum currentState=ASCII;
01206     Cnv2022Type myType =ASCII1;
01207     UChar32 mySourceChar = 0x0000;
01208     int iterCount = 0;
01209     const char *escSeq = NULL;
01210     UBool isShiftAppended = FALSE;
01211     UBool isTargetUCharDBCS=FALSE,oldIsTargetUCharDBCS=FALSE; 
01212     isEscapeAppended =(UBool) myConverterData->isEscapeAppended;
01213     isShiftAppended =(UBool) myConverterData->isShiftAppended;
01214     initIterState =ASCII;
01215 
01216     /* arguments check*/
01217     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
01218         *err = U_ILLEGAL_ARGUMENT_ERROR;
01219         return;
01220     }
01221     initIterState = myConverterData->fromUnicodeCurrentState;
01222     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < myTargetLength) {
01223         goto getTrail;
01224     }
01225     while(mySourceIndex <  mySourceLength){
01226         currentState = myConverterData->fromUnicodeCurrentState;
01227         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
01228             myConverterData->myConverterArray[0] :
01229         myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
01230         isTargetUCharDBCS   = (UBool) args->converter->fromUnicodeStatus;
01231         
01232         if(myTargetIndex < myTargetLength){
01233             
01234             mySourceChar = (UChar) args->source[mySourceIndex++];
01235             
01236             myType= (Cnv2022Type) myConverterType[currentState];
01237             
01238             /* I am handling surrogates in the begining itself so that I donot have to go through 8 
01239             * iterations on codepages that we support. Adapted from MBCS 
01240             */
01241             if(UTF_IS_SURROGATE(mySourceChar)) {
01242                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
01243                     args->converter->fromUSurrogateLead=(UChar)mySourceChar;
01244 getTrail:
01245                     /*look ahead to find the trail surrogate*/
01246                     if(mySourceIndex <  mySourceLength) {
01247                         /* test the following code unit */
01248                         UChar trail=(UChar) args->source[mySourceIndex];
01249                         if(UTF_IS_SECOND_SURROGATE(trail)) {
01250                             ++mySourceIndex;
01251                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
01252                             args->converter->fromUSurrogateLead=0x00;
01253                             /* convert this surrogate code point */
01254                             /* exit this condition tree */
01255                         } else {
01256                             /* this is an unmatched lead code unit (1st surrogate) */
01257                             /* callback(illegal) */
01258                             reason=UCNV_ILLEGAL;
01259                             *err=U_ILLEGAL_CHAR_FOUND;
01260                             goto CALLBACK;
01261                         }
01262                     } else {
01263                         /* no more input */
01264                         break;
01265                     }
01266                 } else {
01267                     /* this is an unmatched trail code unit (2nd surrogate) */
01268                     /* callback(illegal) */
01269                     reason=UCNV_ILLEGAL;
01270                     *err=U_ILLEGAL_CHAR_FOUND;
01271                     goto CALLBACK;
01272                 }
01273             }
01274             /*Do the conversion*/
01275             if(mySourceChar == 0x0020){
01276                 
01277                 if(currentState > 2){
01278                     concatEscape(args, &myTargetIndex, &myTargetLength, escSeqChars[0],err,strlen(escSeqChars[0]),&mySourceIndex);
01279                     
01280                     isTargetUCharDBCS=FALSE;
01281                 }
01282                 concatString(args, &myTargetIndex, &myTargetLength,&mySourceChar,err,&mySourceIndex);
01283                 myConverterData->isEscapeAppended=isEscapeAppended =FALSE;
01284                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
01285                 continue;
01286             }
01287             /* if the source character is CR or LF then append the ASCII escape sequence*/
01288             else if(mySourceChar== 0x000A || mySourceChar== 0x000D || mySourceChar==0x0009 || mySourceChar==0x000B){
01289                 
01290                 if((isTargetUCharDBCS || currentState==JISX201) &&  mySource[mySourceIndex-2]!=0x000D){
01291                     concatEscape(args, &myTargetIndex, &myTargetLength, escSeqChars[0],err,strlen(escSeqChars[0]),&mySourceIndex);
01292                     isTargetUCharDBCS=FALSE;
01293                     isShiftAppended =FALSE;
01294                     myConverterData->isEscapeAppended=isEscapeAppended=FALSE;
01295                     myConverterData->isShiftAppended=FALSE;
01296                     
01297                 }
01298                 
01299                 concatString(args, &myTargetIndex, &myTargetLength,&mySourceChar,err,&mySourceIndex);
01300                 
01301                 if(currentState==ISO8859_1 || currentState ==ISO8859_7)
01302                     isEscapeAppended =FALSE;
01303                 
01304                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
01305                 
01306                 continue;
01307             }
01308             else{
01309                 
01310                 do{
01311                     switch (myType){
01312                     
01313                         case SBCS:
01314                             if( mySourceChar <0xffff) {
01315                                 myFromUnicodeSBCS = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicode;
01316                                 myFromUnicodeSBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicodeFallback;
01317                     
01318                                 targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCS, mySourceChar);
01319                                
01320                                 /* There are no fallbacks in ISO_8859_1, ISO_8859_7,JISX201 so we can
01321                                  * safely ignore the codepaths below
01322                                  */
01323                                 /*if ((targetUniChar==0) && UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
01324                                     (myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
01325                                     targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCSFallback, mySourceChar);
01326                                 } */
01327                                 /* ucmp8_getU returns 0 for missing char so explicitly set it missingCharMarker*/
01328                                 targetUniChar=(UChar)((targetUniChar==0) ? (UChar) missingCharMarker : targetUniChar);
01329                             }
01330                             break;
01331                     
01332                         case DBCS:
01333                             if(mySourceChar < 0xffff){
01334                                 myFromUnicodeDBCS = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicode;
01335                                 myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
01336                                 targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
01337                                 
01338                                 /* There are no fallbacks in JISX208,JISX212, KSC5601,GB2312 so we can
01339                                  * safely ignore the codepaths below
01340                                  */
01341                                 /*
01342                                 if ((targetUniChar==missingCharMarker) && UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
01343                                     (myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
01344                                     targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
01345                                 } */
01346                             }
01347                             break;
01348 
01349                         case LATIN1:
01350                             if(mySourceChar < 0x0100){
01351                                 targetUniChar = mySourceChar;
01352                             } 
01353                             else 
01354                                 targetUniChar = missingCharMarker;
01355                             break;
01356 
01357                         case ASCII1:
01358                             if(mySourceChar < 0x7f){
01359                                 targetUniChar = mySourceChar;
01360                             }
01361                             else 
01362                                 targetUniChar = missingCharMarker;
01363                             break;
01364                         default:
01365                             /*not expected */ 
01366                             break;
01367                     }
01368                     if(targetUniChar==missingCharMarker){
01369                         isEscapeAppended = FALSE; 
01370                         /* save the state */
01371                         myConverterData->fromUnicodeCurrentState=nextStateArray[myConverterData->version][currentState];
01372                         myConverterData->isEscapeAppended = isEscapeAppended;
01373                         myConverterData->isShiftAppended =isShiftAppended;
01374                         args->converter->fromUnicodeStatus = isTargetUCharDBCS;
01375                        /* myConverterData->sourceIndex = mySourceIndex;
01376                         myConverterData->targetIndex = myTargetIndex;*/
01377                         currentState = myConverterData->fromUnicodeCurrentState;
01378                         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
01379                         myConverterData->myConverterArray[0] :
01380                                         myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
01381                         isTargetUCharDBCS   = (UBool) args->converter->fromUnicodeStatus;
01382                         myType= (Cnv2022Type) myConverterType[currentState];
01383                     }
01384 
01385                 }while(targetUniChar==missingCharMarker && initIterState != currentState);
01386 
01387             }
01388             
01389             if(targetUniChar!= missingCharMarker){
01390                 
01391                 oldIsTargetUCharDBCS = isTargetUCharDBCS;
01392                 isTargetUCharDBCS =(UBool) (targetUniChar >0x00FF);
01393                 args->converter->fromUnicodeStatus= isTargetUCharDBCS;
01394                 /* set the iteration state and iteration count  */
01395                 initIterState = currentState;
01396                 iterCount =0;
01397                 /* Append the escpace sequence */
01398                 if(!isEscapeAppended){
01399                     escSeq = escSeqChars[(int)currentState];
01400                     concatEscape(args, &myTargetIndex, &myTargetLength, 
01401                         escSeqChars[(int)currentState],
01402                         err,strlen(escSeqChars[(int)currentState]),&mySourceIndex);
01403                     
01404                     isEscapeAppended =TRUE;
01405                     myConverterData->isEscapeAppended=TRUE;
01406                     
01407                     /* Append SSN for shifting to G2 */
01408                     if(currentState==ISO8859_1 || currentState==ISO8859_7){
01409 
01410                         concatEscape(args, &myTargetIndex, &myTargetLength,
01411                             UCNV_SS2,err,strlen(UCNV_SS2),&mySourceIndex);
01412                     }
01413                 }
01414 
01415                 concatString(args, &myTargetIndex, &myTargetLength,
01416                     &targetUniChar,err, &mySourceIndex);
01417                 
01418                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
01419                 
01420             }/* end of end if(targetUniChar==missingCharMarker)*/
01421             else{
01422                 
01423                 /* if we cannot find the character after checking all codepages 
01424                  * then this is an error
01425                  */
01426                     reason = UCNV_UNASSIGNED;
01427                     *err = U_INVALID_CHAR_FOUND;
01428                     args->converter->invalidUCharBuffer[0]=(UChar)mySourceChar;
01429                     args->converter->invalidUCharLength++;
01430                     
01431 CALLBACK:
01432                     saveSource = args->source;
01433                     saveTarget = args->target;
01434      
01435                     args->target = (char*)myTarget + myTargetIndex;
01436                     args->source = mySource + mySourceIndex;
01437                     myConverterData->isShiftAppended =isShiftAppended;
01438                     args->converter->fromUnicodeStatus = isTargetUCharDBCS;
01439                             
01440                     args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
01441 
01442                     FromU_CALLBACK_MACRO(args->converter->fromUContext,
01443                         args,
01444                         args->converter->invalidUCharBuffer,
01445                         args->converter->invalidUCharLength,
01446                         (UChar32) (args->converter->invalidUCharLength == 2 ? 
01447                         UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
01448                         args->converter->invalidUCharBuffer[1]) 
01449                         : args->converter->invalidUCharBuffer[0]),
01450                         reason,
01451                         err);
01452 
01453                     myConverterData->isEscapeAppended = isEscapeAppended=FALSE;
01454                     args->source=saveSource;
01455                     args->target=saveTarget;
01456                     args->converter->fromUSurrogateLead=0x00;
01457                     initIterState = myConverterData->fromUnicodeCurrentState;
01458                     isTargetUCharDBCS  = (UBool)(args->converter->fromUnicodeStatus);
01459                     args->converter->invalidUCharLength = 0;
01460                     if (U_FAILURE (*err)){
01461                         break;
01462                     }
01463                     
01464             }
01465             targetUniChar =missingCharMarker;
01466         } /* end if(myTargetIndex<myTargetLength) */
01467         else{
01468             *err =U_BUFFER_OVERFLOW_ERROR;
01469             break;
01470         } 
01471         
01472     }/* end while(mySourceIndex<mySourceLength) */
01473 
01474     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){
01475         if (U_SUCCESS(*err) ){
01476             *err = U_TRUNCATED_CHAR_FOUND;
01477             args->converter->toUnicodeStatus = 0x00;
01478         }
01479     }
01480     /* Reset the state of converter if we consumed 
01481      * the source and flush is true
01482      */
01483     if( (mySourceIndex == mySourceLength) && args->flush){
01484         setInitialStateFromUnicodeJPCN(myConverterData);
01485     }
01486     /*save the state and return */
01487     args->target += myTargetIndex;
01488     args->source += mySourceIndex;
01489     myConverterData->sourceIndex = 0;
01490     myConverterData->targetIndex = 0;
01491     args->converter->fromUnicodeStatus = isTargetUCharDBCS;
01492     
01493 }
01494 U_CFUNC void UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
01495     UChar* mySource =(UChar*)args->source;
01496     
01497     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
01498     UConverterCallbackReason reason;
01499     UBool isEscapeAppended = FALSE;
01500     StateEnum initIterState;
01501     unsigned char *myTarget = (unsigned char *) args->target; 
01502     const UChar *saveSource;
01503     char *saveTarget;
01504     int32_t *saveOffsets ;
01505     int32_t myTargetLength = args->targetLimit - args->target;
01506     int32_t mySourceLength = args->sourceLimit - args->source;
01507     int32_t mySourceIndex = 0;
01508     int32_t myTargetIndex = 0;
01509     CompactShortArray *myFromUnicodeDBCS = NULL;
01510     CompactShortArray *myFromUnicodeDBCSFallback = NULL;
01511     CompactByteArray  *myFromUnicodeSBCS = NULL;
01512     CompactByteArray  *myFromUnicodeSBCSFallback = NULL;
01513     UChar32 targetUniChar = missingCharMarker;
01514     StateEnum currentState=ASCII;
01515     Cnv2022Type myType=ASCII1;
01516     UChar32 mySourceChar = 0x0000;
01517     int iterCount = 0;
01518     int32_t currentOffset;
01519     const char *escSeq = NULL;
01520     UBool isShiftAppended = FALSE;
01521     UBool isTargetUCharDBCS=FALSE,oldIsTargetUCharDBCS=FALSE; 
01522     isEscapeAppended =(UBool) myConverterData->isEscapeAppended;
01523     isShiftAppended =(UBool) myConverterData->isShiftAppended;
01524     initIterState =ASCII;
01525 
01526     /* arguments check*/
01527     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
01528         *err = U_ILLEGAL_ARGUMENT_ERROR;
01529         return;
01530     }
01531     initIterState = myConverterData->fromUnicodeCurrentState;
01532     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < myTargetLength) {
01533         goto getTrail;
01534     }
01535     while(mySourceIndex <  mySourceLength){
01536         currentState = myConverterData->fromUnicodeCurrentState;
01537         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
01538             myConverterData->myConverterArray[0] :
01539         myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
01540         isTargetUCharDBCS   = (UBool) args->converter->fromUnicodeStatus;
01541         
01542         if(myTargetIndex < myTargetLength){
01543 
01544             mySourceChar = (UChar) args->source[mySourceIndex++];
01545             
01546             myType= (Cnv2022Type) myConverterType[currentState];
01547             
01548             /* I am handling surrogates in the begining itself so that I donot have to go through 8 
01549             * iterations on codepages that we support. 
01550             */
01551             if(UTF_IS_SURROGATE(mySourceChar)) {
01552                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
01553                     args->converter->fromUSurrogateLead=(UChar)mySourceChar;
01554 getTrail:
01555                     /*look ahead to find the trail surrogate*/
01556                     if(mySourceIndex <  mySourceLength) {
01557                         /* test the following code unit */
01558                         UChar trail=(UChar) args->source[mySourceIndex];
01559                         if(UTF_IS_SECOND_SURROGATE(trail)) {
01560                             ++mySourceIndex;
01561                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
01562                             args->converter->fromUSurrogateLead=0x00;
01563                             /* convert this surrogate code point */
01564                             /* exit this condition tree */
01565                         } else {
01566                             /* this is an unmatched lead code unit (1st surrogate) */
01567                             /* callback(illegal) */
01568                             reason=UCNV_ILLEGAL;
01569                             *err=U_ILLEGAL_CHAR_FOUND;
01570                             goto CALLBACK;
01571                         }
01572                     } else {
01573                         /* no more input */
01574                         break;
01575                     }
01576                 } else {
01577                     /* this is an unmatched trail code unit (2nd surrogate) */
01578                     /* callback(illegal) */
01579                     reason=UCNV_ILLEGAL;
01580                     *err=U_ILLEGAL_CHAR_FOUND;
01581                     goto CALLBACK;
01582                 }
01583             }
01584             /*Do the conversion*/
01585             if(mySourceChar == 0x0020){
01586                 
01587                 if(currentState > 2){
01588                     concatEscape(args, &myTargetIndex, &myTargetLength, escSeqChars[0],err,strlen(escSeqChars[0]),&mySourceIndex);
01589                     
01590                     isTargetUCharDBCS=FALSE;
01591                 }
01592                 concatString(args, &myTargetIndex, &myTargetLength,&mySourceChar,err,&mySourceIndex);
01593                 myConverterData->isEscapeAppended=isEscapeAppended =FALSE;
01594                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
01595                 continue;
01596             }
01597             /* if the source character is CR or LF then append the ASCII escape sequence*/
01598             else if(mySourceChar== 0x000A || mySourceChar== 0x000D || mySourceChar==0x0009 || mySourceChar==0x000B){
01599                 
01600                 if((isTargetUCharDBCS || currentState==JISX201) &&  mySource[mySourceIndex-2]!=0x000D){
01601                     concatEscape(args, &myTargetIndex, &myTargetLength, escSeqChars[0],err,strlen(escSeqChars[0]),&mySourceIndex);
01602                     isTargetUCharDBCS=FALSE;
01603                     isShiftAppended =FALSE;
01604                     myConverterData->isEscapeAppended=isEscapeAppended=FALSE;
01605                     myConverterData->isShiftAppended=FALSE;
01606                     
01607                 }
01608                 
01609                 concatString(args, &myTargetIndex, &myTargetLength,&mySourceChar,err,&mySourceIndex);
01610                 
01611                 if(currentState==ISO8859_1 || currentState ==ISO8859_7)
01612                     isEscapeAppended =FALSE;
01613                 
01614                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
01615                 
01616                 continue;
01617             }
01618             else{
01619                 
01620                 do{
01621                     switch (myType){
01622                     
01623                         case SBCS:
01624                             if(mySourceChar < 0xffff){
01625                                 myFromUnicodeSBCS = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicode;
01626                                 myFromUnicodeSBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicodeFallback;
01627                     
01628                                 targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCS, mySourceChar);
01629                     
01630                                 /* There are no fallbacks in ISO_8859_1, ISO_8859_7,JISX201 so we can
01631                                  * safely ignore the codepaths below
01632                                  */
01633                                 /*if ((targetUniChar==0) && UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
01634                                     (myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
01635                                     targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCSFallback, mySourceChar);
01636                                 } */
01637 
01638                                 /* ucmp8_getU returns 0 for missing char so explicitly set it missingCharMarker*/
01639                                 targetUniChar=(UChar)((targetUniChar==0) ? (UChar) missingCharMarker : targetUniChar);
01640                             }
01641                             break;
01642                     
01643                         case DBCS:
01644                             if(mySourceChar < 0xffff){
01645                                 myFromUnicodeDBCS = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicode;
01646                                 myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
01647                                 targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
01648                                 /* There are no fallbacks in JISX208,JISX212, KSC5601,GB2312 so we can
01649                                  * safely ignore the codepaths below
01650                                  */
01651                                 /*
01652                                 if ((targetUniChar==missingCharMarker) && UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
01653                                     (myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
01654                                     targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
01655                                 }
01656                                 */
01657                             }
01658                             break;
01659 
01660                         case LATIN1:
01661                             if(mySourceChar < 0x0100){
01662                                 targetUniChar = mySourceChar;
01663                             } 
01664                             else 
01665                                 targetUniChar = missingCharMarker;
01666                             break;
01667 
01668                         case ASCII1:
01669                             if(mySourceChar < 0x7f){
01670                                 targetUniChar = mySourceChar;
01671                             }
01672                             else 
01673                                 targetUniChar = missingCharMarker;
01674                             break;
01675                         default:
01676                             /*not expected */ 
01677                             break;
01678                     }
01679                     if(targetUniChar==missingCharMarker){
01680                         isEscapeAppended = FALSE; 
01681                         /* save the state */
01682                         myConverterData->fromUnicodeCurrentState=nextStateArray[myConverterData->version][currentState];
01683                         myConverterData->isEscapeAppended = isEscapeAppended;
01684                         myConverterData->isShiftAppended =isShiftAppended;
01685                         args->converter->fromUnicodeStatus = isTargetUCharDBCS;
01686                         currentState = myConverterData->fromUnicodeCurrentState;
01687                         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
01688                         myConverterData->myConverterArray[0] :
01689                                         myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
01690                         isTargetUCharDBCS   = (UBool) args->converter->fromUnicodeStatus;
01691                         myType= (Cnv2022Type) myConverterType[currentState];
01692                     }
01693 
01694                 }while(targetUniChar==missingCharMarker && initIterState != currentState);
01695 
01696             }
01697             
01698             if(targetUniChar!= missingCharMarker){
01699                 
01700                 oldIsTargetUCharDBCS = isTargetUCharDBCS;
01701                 isTargetUCharDBCS =(UBool) (targetUniChar >0x00FF);
01702                 args->converter->fromUnicodeStatus= isTargetUCharDBCS;
01703                 /* set the iteration state and iteration count  */
01704                 initIterState = currentState;
01705                 iterCount =0;
01706                 /* Append the escpace sequence */
01707                 if(!isEscapeAppended){
01708                     escSeq = escSeqChars[(int)currentState];
01709                     concatEscape(args, &myTargetIndex, &myTargetLength, 
01710                         escSeqChars[(int)currentState],
01711                         err,strlen(escSeqChars[(int)currentState]),&mySourceIndex);
01712                     
01713                     isEscapeAppended =TRUE;
01714                     myConverterData->isEscapeAppended=TRUE;
01715                     
01716                     /* Append SSN for shifting to G2 */
01717                     if(currentState==ISO8859_1 || currentState==ISO8859_7){
01718 
01719                         concatEscape(args, &myTargetIndex, &myTargetLength,
01720                             UCNV_SS2,err,strlen(UCNV_SS2),&mySourceIndex);
01721                     }
01722                 }
01723 
01724                 concatString(args, &myTargetIndex, &myTargetLength,
01725                     &targetUniChar,err, &mySourceIndex);
01726                 
01727                 TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
01728                 
01729             }/* end of end if(targetUniChar==missingCharMarker)*/
01730             else{
01731                 
01732                 /* if we cannot find the character after checking all codepages 
01733                  * then this is an error
01734                  */
01735                     reason = UCNV_UNASSIGNED;
01736                     *err = U_INVALID_CHAR_FOUND;
01737                     
01738 CALLBACK:
01739                     args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++]=(UChar)mySourceChar;
01740                     currentOffset = args->offsets[myTargetIndex-1]+1;
01741                     saveSource = args->source;
01742                     saveTarget = args->target;
01743                     saveOffsets = args->offsets;
01744                     args->target = (char*)myTarget + myTargetIndex;
01745                     args->source = mySource + mySourceIndex;
01746 
01747                     myConverterData->isEscapeAppended = isEscapeAppended;
01748                     myConverterData->isShiftAppended =isShiftAppended;
01749                     args->converter->fromUnicodeStatus = isTargetUCharDBCS;
01750                     myConverterData->sourceIndex = mySourceIndex;
01751                     myConverterData->targetIndex = myTargetIndex;
01752                                
01753                     args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
01754 
01755                     args->offsets = args->offsets?args->offsets+myTargetIndex:0;
01756                     FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext,
01757                         args,
01758                         args->converter->invalidUCharBuffer,
01759                         args->converter->invalidUCharLength,
01760                         (UChar32) (args->converter->invalidUCharLength == 2 ? 
01761                         UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
01762                         args->converter->invalidUCharBuffer[1]) 
01763                         : args->converter->invalidUCharBuffer[0]),
01764                         reason,
01765                         err);
01766              
01767                     args->source=saveSource;
01768                     args->target=saveTarget;
01769                     args->offsets=saveOffsets;
01770                     initIterState = myConverterData->fromUnicodeCurrentState;
01771                     isTargetUCharDBCS  = (UBool)(args->converter->fromUnicodeStatus);
01772                     myConverterData->isEscapeAppended = isEscapeAppended=FALSE;
01773                     args->converter->invalidUCharLength = 0;
01774                     args->converter->fromUSurrogateLead=0x00;
01775                     if (U_FAILURE (*err)){
01776                         break;
01777                     }
01778                     
01779             }
01780             targetUniChar =missingCharMarker;
01781         } /* end if(myTargetIndex<myTargetLength) */
01782         else{
01783             *err =U_BUFFER_OVERFLOW_ERROR;
01784             break;
01785         } 
01786         
01787     }/* end while(mySourceIndex<mySourceLength) */
01788     /*If at the end of conversion we are still carrying state information
01789     *flush is TRUE, we can deduce that the input stream is truncated
01790     */
01791     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){
01792         if (U_SUCCESS(*err)){
01793             *err = U_TRUNCATED_CHAR_FOUND;
01794             args->converter->toUnicodeStatus = 0x00;
01795         }
01796     }
01797     /* Reset the state of converter if we consumed 
01798      * the source and flush is true
01799      */
01800     if( (mySourceIndex == mySourceLength) && args->flush){
01801         setInitialStateFromUnicodeJPCN(myConverterData);
01802     }
01803 
01804     /*save the state and return */
01805     args->target += myTargetIndex;
01806     args->source += mySourceIndex;
01807     myConverterData->sourceIndex = 0;
01808     myConverterData->targetIndex = 0;
01809     args->converter->fromUnicodeStatus = isTargetUCharDBCS;
01810     
01811 }
01812 
01813 static void concatString(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
01814                          const UChar32* strToAppend,UErrorCode* err, int32_t *sourceIndex){
01815     
01816     if(*strToAppend < 0x00FF){
01817         if( (*targetIndex)+1 >= *targetLength){
01818             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend;
01819             *err = U_BUFFER_OVERFLOW_ERROR;
01820         }else{
01821             args->target[*targetIndex] = (unsigned char) *strToAppend;
01822             
01823             if(args->offsets!=NULL){
01824                 args->offsets[*targetIndex] = *sourceIndex-1;
01825             }
01826             (*targetIndex)++;
01827             
01828         }
01829     }
01830     else{
01831         if(*targetIndex < *targetLength){
01832             args->target[*targetIndex] =(unsigned char) (*strToAppend>>8);
01833             if(args->offsets!=NULL){
01834                 args->offsets[*targetIndex] = *sourceIndex-1;
01835             }
01836             (*targetIndex)++;
01837             
01838             if(*targetIndex < *targetLength){
01839                 args->target[(*targetIndex)] =(unsigned char) (*strToAppend & 0x00FF);
01840                 
01841                 if(args->offsets!=NULL){
01842                     args->offsets[*targetIndex] = *sourceIndex-1;
01843                 }
01844                 (*targetIndex)++;
01845             }
01846             else{
01847                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (*strToAppend & 0x00FF);
01848                 *err = U_BUFFER_OVERFLOW_ERROR;
01849                 
01850             }
01851             
01852         }
01853         else{
01854             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (*strToAppend>>8);
01855             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (*strToAppend & 0x00FF);
01856             *err = U_BUFFER_OVERFLOW_ERROR;
01857             if(args->offsets!=NULL){
01858                 args->offsets[*targetIndex] = *sourceIndex-1;
01859                 
01860             }
01861         }
01862     }
01863     
01864 }
01865 
01866 
01867 static void concatEscape(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
01868                          const char* strToAppend,UErrorCode* err,int len,int32_t *sourceIndex){
01869     while(len-->0){
01870         if(*targetIndex < *targetLength){
01871             args->target[*targetIndex] = (unsigned char) *strToAppend;
01872             if(args->offsets!=NULL){
01873                 args->offsets[*targetIndex] = *sourceIndex-1;
01874             }
01875             (*targetIndex)++;
01876         }
01877         else{
01878             args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend;
01879             *err =U_BUFFER_OVERFLOW_ERROR;
01880         }
01881         strToAppend++;
01882     }
01883 }
01884 
01885 static void concatChar(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
01886                        int8_t charToAppend,UErrorCode* err,int32_t *sourceIndex){
01887     if( *targetIndex < *targetLength){
01888         args->target[(*targetIndex)++] = (unsigned char) charToAppend;
01889         if(args->offsets!=NULL){
01890                 args->offsets[*targetIndex] = *sourceIndex-1;
01891         }
01892     }else{
01893         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) charToAppend;
01894         *err = U_BUFFER_OVERFLOW_ERROR;
01895     }
01896 }
01897 
01898 /*************** to unicode *******************/
01899 
01900 /*
01901 * This is a simple, interim implementation of GetNextUChar()
01902 * that allows to concentrate on testing one single implementation
01903 * of the ToUnicode conversion before it gets copied to
01904 * multiple version that are then optimized for their needs
01905 * (with vs. without offsets and getNextUChar).
01906 */
01907 
01908 U_CFUNC UChar32
01909 UConverter_getNextUChar_ISO_2022_JP(UConverterToUnicodeArgs *pArgs,
01910                                     UErrorCode *pErrorCode) {
01911     UChar buffer[UTF_MAX_CHAR_LENGTH];
01912     const char *realLimit=pArgs->sourceLimit;
01913     
01914     pArgs->target=buffer;
01915     pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
01916     
01917     while(pArgs->source<realLimit) {
01918         /* feed in one byte at a time to make sure to get only one character out */
01919         pArgs->sourceLimit=pArgs->source+1;
01920         pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
01921         UConverter_toUnicode_ISO_2022_JP(pArgs, pErrorCode);
01922         if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
01923             return 0xffff;
01924         } else if(pArgs->target!=buffer) {
01925             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
01926                 *pErrorCode=U_ZERO_ERROR;
01927             }
01928             return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
01929         }
01930     }
01931     
01932     /* no output because of empty input or only state changes and skipping callbacks */
01933     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
01934     return 0xffff;
01935 }
01936 
01937 /****************************************************************************
01938  * Recognized escape sequences are
01939  * <ESC>(B  ASCII      
01940  * <ESC>.A  ISO-8859-1 
01941  * <ESC>.F  ISO-8859-7 
01942  * <ESC>(J  JISX-201
01943  * <ESC>(I  JISX-201 
01944  * <ESC>$B  JISX-208
01945  * <ESC>$@  JISX-208
01946  * <ESC>$(D JISX-212   
01947  * <ESC>$A  GB2312     
01948  * <ESC>$(C KSC5601
01949  */
01950 static StateEnum nextStateToUnicodeJP[3][MAX_STATES_2022]= {
01951 
01952     {
01953 /*      0                       1                    2                      3                   4               5                           6                          7                    8                   9    */
01954         
01955         INVALID_STATE           ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,ASCII1             
01956         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,JISX201                ,JISX201                ,INVALID_STATE          ,INVALID_STATE          
01957         ,INVALID_STATE          ,JISX208            ,INVALID_STATE      ,JISX208            ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
01958         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
01959         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE     
01960         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE
01961         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
01962         ,INVALID_STATE          ,INVALID_STATE
01963     },
01964 
01965     {
01966 /*      0                       1                    2                      3                   4               5                           6                          7                    8                   9    */
01967         
01968         INVALID_STATE           ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,ASCII1             
01969         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,JISX201                ,JISX201                ,INVALID_STATE          ,INVALID_STATE          
01970         ,INVALID_STATE          ,JISX208            ,INVALID_STATE      ,JISX208            ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
01971         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,JISX212                ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
01972         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE     
01973         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE
01974         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
01975         ,INVALID_STATE          ,INVALID_STATE
01976     },
01977 
01978     {
01979 /*      0                       1                    2                      3                   4               5                           6                          7                    8                   9    */
01980         
01981         INVALID_STATE           ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,ASCII1             
01982         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,JISX201                ,JISX201                ,INVALID_STATE          ,INVALID_STATE          
01983         ,INVALID_STATE          ,JISX208            ,GB2312             ,JISX208            ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,ISO8859_1          
01984         ,ISO8859_7              ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,KSC5601                ,JISX212                ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
01985         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE     
01986         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE
01987         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
01988         ,INVALID_STATE          ,INVALID_STATE
01989     }
01990 };
01991 
01992 
01993 U_CFUNC void UConverter_toUnicode_ISO_2022_JP(UConverterToUnicodeArgs *args,
01994                                               UErrorCode* err){
01995     char tempBuf[2] ;
01996     const char* pBuf;
01997     const char *mySource = ( char *) args->source;
01998     UChar *myTarget = args->target;
01999     int32_t mySourceIndex = 0;
02000     int32_t myTargetIndex = 0;
02001     const char *mySourceLimit = args->sourceLimit;
02002     UChar32 targetUniChar = 0x0000;
02003     UChar mySourceChar = 0x0000;
02004     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
02005     CompactShortArray *myToUnicodeDBCS=NULL, *myToUnicodeFallbackDBCS = NULL; 
02006     UChar *myToUnicodeSBCS = NULL, *myToUnicodeFallbackSBCS = NULL;
02007     int plane=0; /*dummy variable*/
02008     pBuf = &tempBuf[0];
02009     /*Arguments Check*/
02010     if (U_FAILURE(*err)) 
02011         return;
02012     
02013     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
02014         *err = U_ILLEGAL_ARGUMENT_ERROR;
02015         return;
02016     }
02017     
02018     while(mySource< args->sourceLimit){
02019         
02020         if(myTarget < args->targetLimit){
02021             
02022             mySourceChar= (unsigned char) *mySource++;
02023             
02024             if(args->converter->mode==UCNV_SI){
02025                     
02026                /* if there are no escape sequences in the first buffer then they
02027                 * are assumed to be ASCII according to RFC-1554
02028                 */    
02029                 myData->toUnicodeCurrentState = ASCII;
02030              }
02031             
02032             switch(mySourceChar){
02033                 case 0x0A:
02034                     if(args->converter->toUnicodeStatus != 0x00){
02035                         goto SAVE_STATE;
02036                     }
02037                      myData->toUnicodeCurrentState = ASCII; 
02038                     break;
02039                 
02040                 case 0x0D:
02041                     if(args->converter->toUnicodeStatus != 0x00){
02042                         goto SAVE_STATE;
02043                     }
02044                      myData->toUnicodeCurrentState = ASCII;
02045 
02046                     break;
02047                                 
02048                 case 0x20:
02049                     if(args->converter->toUnicodeStatus != 0x00){
02050                         goto SAVE_STATE;
02051                     }
02052                     myData->toUnicodeCurrentState = ASCII;
02053 
02054                     break;
02055                             
02056                 default:
02057                     /* if we are in the middle of consuming an escape sequence 
02058                      * we continue to next switch tag else we break
02059                      */
02060                     if(myData->key==0){
02061                         break;
02062                     }
02063 
02064                 case ESC_2022:
02065                     if(args->converter->toUnicodeStatus != 0x00){
02066                         goto SAVE_STATE;
02067                     }
02068                     mySource--;
02069                     changeState_2022(args->converter,&(mySource), 
02070                         args->sourceLimit, args->flush,ISO_2022_JP,&plane, err);
02071                     if(U_FAILURE(*err)){
02072                         return;
02073                     }
02074                     continue;
02075             }
02076             
02077             switch(myConverterType[myData->toUnicodeCurrentState]){
02078                 
02079                  case ASCII1:
02080                     if(args->converter->toUnicodeStatus == 0x00 && mySourceChar < 0x7F){
02081                         targetUniChar = (UChar) mySourceChar;
02082                     }
02083                     else{
02084                         goto SAVE_STATE;
02085                     }
02086                     break;
02087                 case SBCS:
02088                     if(args->converter->toUnicodeStatus == 0x00){
02089                         myToUnicodeSBCS = myData->currentConverter->sharedData->table->sbcs.toUnicode;
02090                         myToUnicodeFallbackSBCS = myData->currentConverter->sharedData->table->sbcs.toUnicodeFallback;
02091                         targetUniChar = myToUnicodeSBCS[(unsigned char) mySourceChar];
02092                         /* There are no fallbacks in ISO_8859_1, ISO_8859_7,JISX201 so we can
02093                          * safely ignore the codepaths below
02094                          */
02095                         /*
02096                         if(targetUniChar> 0xfffe){
02097                             if(UCNV_TO_U_USE_FALLBACK(args->converter) && 
02098                                 (myData->currentConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
02099                             
02100                                 targetUniChar = myToUnicodeFallbackSBCS[(unsigned char) mySource[mySourceIndex-1]];
02101                             }
02102                         }
02103                         */
02104                         
02105                     }
02106                     else{
02107                         goto SAVE_STATE;
02108                     }
02109                     break;
02110                 
02111                 case DBCS:
02112                     myToUnicodeDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicode;
02113                     myToUnicodeFallbackDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicodeFallback;
02114                 
02115                     if(args->converter->toUnicodeStatus == 0x00){
02116                         args->converter->toUnicodeStatus = (UChar) mySourceChar;
02117                         continue;
02118                     }
02119                     else{
02120                         tempBuf[0] = (char) args->converter->toUnicodeStatus ;
02121                         tempBuf[1] = (char) mySourceChar;
02122                         mySourceChar = (UChar)((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00ff));
02123                         args->converter->toUnicodeStatus =0x00;
02124                     
02125                         targetUniChar = ucmp16_getu(myToUnicodeDBCS,mySourceChar);
02126                         /* There are no fallbacks in JISX208,JISX212, KSC5601,GB2312 so we can
02127                          * safely ignore the codepaths below
02128                          */
02129                         /*
02130                         if(targetUniChar> 0xfffe){
02131                             if(UCNV_TO_U_USE_FALLBACK(args->converter) && 
02132                                 (myData->currentConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
02133                             
02134                                 targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallbackDBCS, mySourceChar);
02135                             }
02136                         }
02137                         */
02138                     }
02139                 
02140                     break;
02141                     
02142                 case LATIN1:
02143                     if(args->converter->fromUnicodeStatus == 0x00 && mySourceChar < 0x100){
02144                         targetUniChar = (UChar) mySourceChar;
02145                     }
02146                     else{
02147                         goto SAVE_STATE;
02148                     }
02149                     break;
02150                 
02151                 case INVALID_STATE:
02152                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
02153                     return;
02154                 
02155                 default :
02156                     break;
02157                     /*not expected*/
02158 
02159             }
02160             if(targetUniChar < 0xfffe){
02161                 *(myTarget++)=(UChar)targetUniChar;
02162                 targetUniChar=missingCharMarker;
02163             }
02164             else if(targetUniChar>=0xfffe){
02165 SAVE_STATE:
02166                 {
02167                     const char *saveSource = args->source;
02168                     UChar *saveTarget = args->target;
02169                     UConverterCallbackReason reason;
02170                 
02171                     if(targetUniChar == 0xfffe){
02172                         reason = UCNV_UNASSIGNED;
02173                         *err = U_INVALID_CHAR_FOUND;
02174                     }
02175                     else{
02176                         reason = UCNV_ILLEGAL;
02177                         *err = U_ILLEGAL_CHAR_FOUND;
02178                     }
02179                     
02180                     if(myConverterType[myData->toUnicodeCurrentState] > SBCS){
02181                         
02182                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[0];
02183                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[1];
02184                     }
02185                     else{
02186                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char) mySourceChar;
02187                     }
02188 
02189                     args->target = myTarget;
02190                     args->source = mySource;
02191                     ToU_CALLBACK_MACRO( args->converter->toUContext,
02192                         args,
02193                         args->converter->invalidCharBuffer,
02194                         args->converter->invalidCharLength,
02195                         reason,
02196                         err);
02197                     myTarget += args->target - myTarget;
02198                     args->source = saveSource;
02199                     args->target = saveTarget;
02200                     args->converter->invalidCharLength=0;
02201                     if(U_FAILURE(*err))
02202                         break;
02203 
02204                 }
02205             }
02206         }
02207         else{
02208             *err =U_BUFFER_OVERFLOW_ERROR;
02209             break;
02210         }
02211     }
02212     if((args->flush==TRUE)
02213         && (mySource == mySourceLimit) 
02214         && ( args->converter->toUnicodeStatus !=0x00)){
02215         if(U_SUCCESS(*err)){
02216             *err = U_TRUNCATED_CHAR_FOUND;
02217             args->converter->toUnicodeStatus = 0x00;
02218         }
02219     }
02220     /* Reset the state of converter if we consumed 
02221      * the source and flush is true
02222      */
02223     if( (mySource == mySourceLimit) && args->flush){
02224         setInitialStateToUnicodeJPCN(args->converter,myData);
02225     }
02226 
02227     args->target = myTarget;
02228     args->source = mySource;
02229 }
02230 
02231 U_CFUNC void UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
02232                                                             UErrorCode* err){
02233     char tempBuf[2];
02234     const char *mySource = ( char *) args->source;
02235     UChar *myTarget = args->target; 
02236     int32_t mySourceIndex = 0;
02237     int32_t myTargetIndex = 0;
02238     const char *mySourceLimit = args->sourceLimit;
02239     UChar32 targetUniChar = 0x0000;
02240     UChar mySourceChar = 0x0000;
02241     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
02242     CompactShortArray *myToUnicodeDBCS=NULL, *myToUnicodeFallbackDBCS = NULL; 
02243     UChar *myToUnicodeSBCS = NULL, *myToUnicodeFallbackSBCS = NULL;
02244     int plane = 0; /*dummy variable*/
02245     /*Arguments Check*/
02246     if (U_FAILURE(*err)) 
02247         return;
02248     
02249     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
02250         *err = U_ILLEGAL_ARGUMENT_ERROR;
02251         return;
02252     }
02253     
02254     while(mySource< args->sourceLimit){
02255         
02256         if(myTarget < args->targetLimit){
02257             
02258             mySourceChar= (unsigned char) *mySource++;
02259             
02260             if(args->converter->mode==UCNV_SI){
02261                     
02262                /* if there are no escape sequences in the first buffer then they
02263                 * are assumed to be ASCII according to RFC-1554
02264                 */    
02265                 myData->toUnicodeCurrentState = ASCII;
02266              }
02267             
02268             switch(mySourceChar){
02269                 case 0x0A:
02270                     if(args->converter->toUnicodeStatus != 0x00){
02271                         goto SAVE_STATE;
02272                     }
02273                      myData->toUnicodeCurrentState = ASCII; 
02274                     break;
02275                 
02276                 case 0x0D:
02277                     if(args->converter->toUnicodeStatus != 0x00){
02278                         goto SAVE_STATE;
02279                     }
02280                     myData->toUnicodeCurrentState = ASCII;
02281 
02282                     break;
02283 
02284                 case 0x20:
02285                     if(args->converter->toUnicodeStatus != 0x00){
02286                         goto SAVE_STATE;
02287                     }
02288                     myData->toUnicodeCurrentState = ASCII; 
02289                     break;
02290                             
02291                 default:
02292                     /* if we are in the middle of consuming an escape sequence 
02293                      * we continue to next switch tag else we break
02294                      */
02295                     if(myData->key==0){
02296                         break;
02297                     }
02298 
02299                 case ESC_2022:
02300                     if(args->converter->toUnicodeStatus != 0x00){
02301                         goto SAVE_STATE;
02302                     }
02303                     mySource--;
02304                     changeState_2022(args->converter,&(mySource), 
02305                         args->sourceLimit, args->flush,ISO_2022_JP,&plane, err);
02306 
02307                     if(U_FAILURE(*err)){
02308                         return;
02309                     }
02310                     continue;
02311             }
02312             
02313             switch(myConverterType[myData->toUnicodeCurrentState]){
02314                 
02315                  case ASCII1:
02316                     if(args->converter->toUnicodeStatus == 0x00 && mySourceChar < 0x7F){
02317                         targetUniChar = (UChar) mySourceChar;
02318                     }
02319                     else{
02320                         goto SAVE_STATE;
02321                     }
02322                     break;
02323                 case SBCS:
02324                     if(args->converter->toUnicodeStatus == 0x00){
02325                         myToUnicodeSBCS = myData->currentConverter->sharedData->table->sbcs.toUnicode;
02326                         myToUnicodeFallbackSBCS = myData->currentConverter->sharedData->table->sbcs.toUnicodeFallback;
02327                         targetUniChar = myToUnicodeSBCS[(unsigned char) mySourceChar];
02328                         /* There are no fallbacks in ISO_8859_1, ISO_8859_7,JISX201 so we can
02329                          * safely ignore the codepaths below
02330                          */
02331                         /*
02332                         if(targetUniChar> 0xfffe){
02333                             if(UCNV_TO_U_USE_FALLBACK(args->converter) && 
02334                                 (myData->currentConverter->sharedData->staticData->hasToUnicodeFallback == TRUE)){
02335                             
02336                                 targetUniChar = myToUnicodeFallbackSBCS[(unsigned char) mySource[mySourceIndex-1]];
02337                             }
02338                         }
02339                         */
02340                     }
02341                     else{
02342                         goto SAVE_STATE;
02343                     }
02344                     break;
02345                 
02346                 case DBCS:
02347                     myToUnicodeDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicode;
02348                     myToUnicodeFallbackDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicodeFallback;
02349                 
02350                     if(args->converter->toUnicodeStatus == 0x00){
02351                         args->converter->toUnicodeStatus = (UChar) mySourceChar;
02352                         continue;
02353                     }
02354                     else{
02355                         tempBuf[0] = (char) args->converter->toUnicodeStatus ;
02356                         tempBuf[1] = (char) mySourceChar;
02357                         mySourceChar= (UChar)((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00ff));
02358                         args->converter->toUnicodeStatus =0x00;
02359                     
02360                         targetUniChar = ucmp16_getu(myToUnicodeDBCS,mySourceChar);
02361                         /* There are no fallbacks in JISX208,JISX212, KSC5601,GB2312 so we can
02362                          * safely ignore the codepaths below
02363                          */
02364                         /*
02365                         if(targetUniChar> 0xfffe){
02366                             if(UCNV_TO_U_USE_FALLBACK(args->converter) && 
02367                                 (myData->currentConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
02368                             
02369                                 targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallbackDBCS, mySourceChar);
02370                             }
02371                         } 
02372                         */
02373                     }
02374                 
02375                     break;
02376                     
02377                 case LATIN1:
02378                     if(args->converter->fromUnicodeStatus == 0x00 && mySourceChar < 0x100){
02379                         targetUniChar = (UChar) mySourceChar;
02380                     }
02381                     else{
02382                         goto SAVE_STATE;
02383                     }
02384                     break;
02385                 
02386                 case INVALID_STATE:
02387                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
02388                     return;
02389                 default:
02390                     /* For non-valid state MBCS and others */
02391                     break;
02392             }
02393             if(targetUniChar < 0xfffe){
02394                 if(myConverterType[myData->toUnicodeCurrentState] > SBCS ){
02395 
02396                     args->offsets[myTarget - args->target]= mySource - args->source -2;
02397                 }
02398                 else{
02399                     args->offsets[myTarget - args->target]= mySource - args->source - 1;
02400                 }
02401                 *(myTarget++)=(UChar)targetUniChar;
02402                 targetUniChar=missingCharMarker;
02403             }
02404             else if(targetUniChar>=0xfffe){
02405 SAVE_STATE:
02406                 {
02407                     const char *saveSource = args->source;
02408                     UChar *saveTarget = args->target;
02409 
02410                     int32_t *saveOffsets = args->offsets;
02411                     
02412                     UConverterCallbackReason reason;
02413                     int32_t currentOffset ;
02414                     int32_t My_i = myTarget - args->target;
02415                     if(myConverterType[myData->toUnicodeCurrentState] > SBCS){
02416 
02417                         currentOffset= mySource - args->source - 2;
02418                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[0];
02419                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[1];
02420                     }
02421                     else{
02422 
02423                         currentOffset= mySource - args->source -1;
02424                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] =(char) mySourceChar;
02425                     }
02426                     
02427                     
02428                     if(targetUniChar == 0xfffe){
02429                         reason = UCNV_UNASSIGNED;
02430                         *err = U_INVALID_CHAR_FOUND;
02431                     }
02432                     else{
02433                         reason = UCNV_ILLEGAL;
02434                         *err = U_ILLEGAL_CHAR_FOUND;
02435                     }
02436 
02437                    
02438                     args->offsets = args->offsets?args->offsets+(myTarget - args->target):0;
02439                     args->target =myTarget;
02440                     myTarget =saveTarget;
02441                     args->source = mySource;
02442                     ToU_CALLBACK_OFFSETS_LOGIC_MACRO( args->converter->toUContext,
02443                         args,
02444                         args->converter->invalidCharBuffer,
02445                         args->converter->invalidCharLength,
02446                         reason,
02447                         err);
02448                     args->converter->invalidCharLength=0;
02449                     myTarget=args->target;
02450                     args->source  = saveSource;
02451                     args->target  = saveTarget;
02452                     /*args->offsets = saveOffsets;*/
02453                     if(U_FAILURE(*err))
02454                         break;
02455 
02456                 }
02457             }
02458         }
02459         else{
02460             *err =U_BUFFER_OVERFLOW_ERROR;
02461             break;
02462         }
02463     }
02464     if((args->flush==TRUE)
02465         && (mySource == mySourceLimit) 
02466         && ( args->converter->toUnicodeStatus !=0x00)){
02467         if(U_SUCCESS(*err)){
02468             *err = U_TRUNCATED_CHAR_FOUND;
02469             args->converter->toUnicodeStatus = 0x00;
02470         }
02471     }
02472     /* Reset the state of converter if we consumed 
02473      * the source and flush is true
02474      */
02475     if( (mySource == mySourceLimit) && args->flush){
02476         setInitialStateToUnicodeJPCN(args->converter,myData);
02477     }
02478 
02479     args->target = myTarget;
02480     args->source = mySource;
02481 }
02482 
02483 
02484 
02485 /***************************************************************
02486 *   Rules for ISO-2022-KR encoding
02487 *   i) The KSC5601 designator sequence should appear only once in a file, 
02488 *      at the begining of a line before any KSC5601 characters. This usually
02489 *      means that it appears by itself on the first line of the file
02490 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
02491 *      and SI to shift into single byte mode   
02492 */
02493 const char* getEndOfBuffer_2022_KR(UConverterToUnicodeArgs* args, UErrorCode* err);
02494 
02495 
02496 U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args, UErrorCode* err){
02497     
02498     const UChar *mySource = args->source;
02499     unsigned char *myTarget = (unsigned char *) args->target;
02500     int32_t mySourceIndex = 0;
02501     int32_t myTargetIndex = 0;
02502     int32_t targetLength = args->targetLimit - args->target;
02503     int32_t sourceLength = args->sourceLimit - args->source;
02504     int32_t length=0;
02505     uint32_t targetUniChar = 0x0000;
02506     UChar32 mySourceChar = 0x0000;
02507     UBool isTargetUCharDBCS = (UBool)args->converter->fromUnicodeStatus;
02508     UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
02509     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
02510     UConverterCallbackReason reason;
02511     
02512     isTargetUCharDBCS   = (UBool) args->converter->fromUnicodeStatus;
02513     
02514     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) {
02515         goto getTrail;
02516     }
02517     /*writing the char to the output stream */
02518     while (mySourceIndex < sourceLength){
02519         
02520         if (myTargetIndex < targetLength){
02521             
02522             mySourceChar = (UChar) args->source[mySourceIndex++];
02523             
02524             /*Handle surrogates */
02525             if(UTF_IS_SURROGATE(mySourceChar)) {
02526                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
02527                     args->converter->fromUSurrogateLead=(UChar)mySourceChar;
02528 getTrail:
02529                     /*look ahead to find the trail surrogate*/
02530                     if(mySourceIndex <  sourceLength) {
02531                         /* test the following code unit */
02532                         UChar trail=(UChar) args->source[mySourceIndex];
02533                         if(UTF_IS_SECOND_SURROGATE(trail)) {
02534                             ++mySourceIndex;
02535                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
02536                             isTargetUCharDBCS=TRUE;
02537                             args->converter->fromUSurrogateLead=0x00;
02538                             /* convert this surrogate code point */
02539                             /* exit this condition tree */
02540                         } else {
02541                             /* this is an unmatched lead code unit (1st surrogate) */
02542                             /* callback(illegal) */
02543                             reason=UCNV_ILLEGAL;
02544                             *err=U_ILLEGAL_CHAR_FOUND;
02545                             goto CALLBACK;
02546                         }
02547                     } else {
02548                         /* no more input */
02549                         break;
02550                     }
02551                 } else {
02552                     /* this is an unmatched trail code unit (2nd surrogate) */
02553                     /* callback(illegal) */
02554                     reason=UCNV_ILLEGAL;
02555                     *err=U_ILLEGAL_CHAR_FOUND;
02556                     goto CALLBACK;
02557                 }
02558             }
02559             length= _MBCSFromUChar32(myConverterData->fromUnicodeConverter->sharedData,
02560                 mySourceChar,&targetUniChar,args->converter->useFallback);
02561            
02562             
02563             /* only DBCS or SBCS characters are expected*/
02564             if(length > 2 || length==0){
02565                 reason =UCNV_ILLEGAL;
02566                 *err =U_INVALID_CHAR_FOUND;
02567                 goto CALLBACK;
02568             }
02569             /* DB haracters with high bit set to 1 are expected */
02570             if(((targetUniChar & 0x8080) != 0x8080)&& length==2){
02571                 reason =UCNV_ILLEGAL;
02572                 *err =U_INVALID_CHAR_FOUND;
02573                 goto CALLBACK;
02574             }
02575             
02576             oldIsTargetUCharDBCS = isTargetUCharDBCS;
02577             isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
02578             
02579             if (targetUniChar != missingCharMarker){
02580                 
02581                 if (oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isShiftAppended){
02582                     
02583                     if (isTargetUCharDBCS) 
02584                         args->target[myTargetIndex++] = UCNV_SO;
02585                     else 
02586                         args->target[myTargetIndex++] = UCNV_SI;
02587 
02588                     myConverterData->isShiftAppended=TRUE;
02589 
02590                     if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)){
02591                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
02592                         *err = U_BUFFER_OVERFLOW_ERROR;
02593                         break;
02594                     }
02595                     else if (myTargetIndex+1 >= targetLength){
02596                         
02597                         args->converter->charErrorBuffer[0] = (char) ((targetUniChar >> 8) -0x80);
02598                         args->converter->charErrorBuffer[1] = (char)((targetUniChar & 0x00FF) -0x80);
02599                         args->converter->charErrorBufferLength = 2;
02600                         *err = U_BUFFER_OVERFLOW_ERROR;
02601                         break;
02602                     }
02603                     
02604                 }
02605                 
02606                 if (!isTargetUCharDBCS){
02607                     
02608                     args->target[myTargetIndex++] = (char) targetUniChar;
02609                 }
02610                 else{
02611                     args->target[myTargetIndex++] = (char) ((targetUniChar >> 8) - 0x80);
02612                     if (myTargetIndex < targetLength){
02613                         args->target[myTargetIndex++] = (char)((targetUniChar & 0x00FF) -0x80);
02614                     }
02615                     else{
02616                         args->converter->charErrorBuffer[0] = (char)((targetUniChar & 0x00FF) -0x80);
02617                         args->converter->charErrorBufferLength = 1;
02618                         *err = U_BUFFER_OVERFLOW_ERROR;
02619                         break;
02620                     }
02621                 }
02622             }
02623             else{
02624                 
02625 CALLBACK:
02626                 {
02627                     const UChar* saveSource = args->source;
02628                     char* saveTarget = args->target;
02629                     int32_t *saveOffsets = args->offsets;
02630                 
02631                     isTargetUCharDBCS = oldIsTargetUCharDBCS;
02632                     *err = U_INVALID_CHAR_FOUND;
02633                     args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
02634                     args->converter->invalidUCharLength = 1;
02635                 
02636                     args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
02637                     args->target += myTargetIndex;
02638                     args->source += mySourceIndex;
02639                     FromU_CALLBACK_MACRO(args->converter->fromUContext,
02640                         args,
02641                         args->converter->invalidUCharBuffer,
02642                         1,
02643                         (UChar32) mySourceChar,
02644                         UCNV_UNASSIGNED,
02645                         err);
02646                     args->source = saveSource;
02647                     args->target = saveTarget;
02648                     args->offsets = saveOffsets;
02649                     args->converter->invalidUCharLength = 0;
02650                     isTargetUCharDBCS=(UBool)args->converter->fromUnicodeStatus;
02651                     myConverterData->isShiftAppended =FALSE;
02652                     args->converter->fromUSurrogateLead=0x00;
02653                     if (U_FAILURE (*err)) 
02654                         break;
02655                 
02656                 }
02657             }
02658             targetUniChar=missingCharMarker;
02659         }
02660         else{
02661             *err = U_BUFFER_OVERFLOW_ERROR;
02662             break;
02663         }
02664         
02665     }
02666     /*If at the end of conversion we are still carrying state information
02667      *flush is TRUE, we can deduce that the input stream is truncated
02668      */
02669     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == sourceLength) && args->flush){
02670         if (U_SUCCESS(*err)){
02671             *err = U_TRUNCATED_CHAR_FOUND;
02672             args->converter->toUnicodeStatus = 0x00;
02673         }
02674     }
02675     /* Reset the state of converter if we consumed 
02676      * the source and flush is true
02677      */
02678     if( (mySourceIndex == sourceLength) && args->flush){
02679         setInitialStateFromUnicodeKR(args->converter,myConverterData);
02680     }
02681 
02682     args->target += myTargetIndex;
02683     args->source += mySourceIndex;
02684     args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
02685     
02686     return;
02687 }
02688 
02689 
02690 U_CFUNC void UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
02691     
02692     const UChar *mySource = args->source;
02693     unsigned char *myTarget = (unsigned char *) args->target;
02694     int32_t mySourceIndex = 0;
02695     int32_t myTargetIndex = 0;
02696     int32_t targetLength = args->targetLimit - args->target;
02697     int32_t sourceLength = args->sourceLimit - args->source;
02698     uint32_t targetUniChar = 0x0000;
02699     UChar32 mySourceChar = 0x0000;
02700     UBool isTargetUCharDBCS = (UBool)args->converter->fromUnicodeStatus;
02701     UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
02702     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
02703     UConverterCallbackReason reason;
02704     int32_t length =0;
02705     
02706     isTargetUCharDBCS   = (UBool) args->converter->fromUnicodeStatus;
02707     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex <targetLength) {
02708         goto getTrail;
02709     }
02710     /*writing the char to the output stream */
02711     while (mySourceIndex < sourceLength){
02712         
02713         if (myTargetIndex < targetLength){
02714             
02715             mySourceChar = (UChar) args->source[mySourceIndex++];
02716             
02717             /*Handle surrogates */
02718               if(UTF_IS_SURROGATE(mySourceChar)) {
02719                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
02720                     args->converter->fromUSurrogateLead=(UChar) mySourceChar;
02721 getTrail:
02722                     /*look ahead to find the trail surrogate*/
02723                     if(mySourceIndex <  sourceLength) {
02724                         /* test the following code unit */
02725                         UChar trail=(UChar) args->source[mySourceIndex];
02726                         if(UTF_IS_SECOND_SURROGATE(trail)) {
02727                             ++mySourceIndex;
02728                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
02729                             isTargetUCharDBCS=TRUE;
02730                             args->converter->fromUSurrogateLead=0x00;
02731                             /* convert this surrogate code point */
02732                             /* exit this condition tree */
02733                         } else {
02734                             /* this is an unmatched lead code unit (1st surrogate) */
02735                             /* callback(illegal) */
02736                             reason=UCNV_ILLEGAL;
02737                             *err=U_ILLEGAL_CHAR_FOUND;
02738                             goto CALLBACK;
02739                         }
02740                     } else {
02741                         /* no more input */
02742                         break;
02743                     }
02744                 } else {
02745                     /* this is an unmatched trail code unit (2nd surrogate) */
02746                     /* callback(illegal) */
02747                     reason=UCNV_ILLEGAL;
02748                     *err=U_ILLEGAL_CHAR_FOUND;
02749                     goto CALLBACK;
02750                 }
02751             }
02752             
02753             length= _MBCSFromUChar32(myConverterData->fromUnicodeConverter->sharedData,
02754                 mySourceChar,&targetUniChar,args->converter->useFallback);
02755             
02756             /* only DBCS or SBCS characters are expected*/
02757             if(length > 2 || length==0){
02758                 reason =UCNV_ILLEGAL;
02759                 *err =U_INVALID_CHAR_FOUND;
02760                 goto CALLBACK;
02761             }
02762             /* DB haracters with high bit set to 1 are expected */
02763             if(((targetUniChar & 0x8080) != 0x8080)&& length==2){
02764                 reason =UCNV_ILLEGAL;
02765                 *err =U_INVALID_CHAR_FOUND;
02766                 goto CALLBACK;
02767             }
02768 
02769             oldIsTargetUCharDBCS = isTargetUCharDBCS;
02770             isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
02771             if (targetUniChar != missingCharMarker){
02772                 
02773                 if (oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isShiftAppended){
02774                     
02775                     args->offsets[myTargetIndex] = mySourceIndex-1;
02776 
02777                     if (isTargetUCharDBCS) 
02778                         args->target[myTargetIndex++] = UCNV_SO;
02779                     else
02780                         args->target[myTargetIndex++] = UCNV_SI;
02781 
02782                     myConverterData->isShiftAppended=TRUE;
02783                     if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)){
02784                         
02785                         args->converter->charErrorBuffer[0] = (char) targetUniChar;
02786                         args->converter->charErrorBufferLength = 1;
02787                         *err = U_BUFFER_OVERFLOW_ERROR;
02788                         break;
02789                     }
02790                     else if (myTargetIndex+1 >= targetLength){
02791                         
02792                         args->converter->charErrorBuffer[0] = (char) ((targetUniChar >> 8)-0x80);
02793                         args->converter->charErrorBuffer[1] = (char) ((targetUniChar & 0x00FF)-0x80);
02794                         args->converter->charErrorBufferLength = 2;
02795                         *err = U_BUFFER_OVERFLOW_ERROR;
02796                         break;
02797                     }
02798                 }
02799                 
02800                 if (!isTargetUCharDBCS){
02801                     
02802                     args->offsets[myTargetIndex] = mySourceIndex-1;
02803                     args->target[myTargetIndex++] = (char) targetUniChar;
02804                 }
02805                 else{
02806                     args->offsets[myTargetIndex] = mySourceIndex-1;
02807                     args->target[myTargetIndex++] = (char) ((targetUniChar >> 8)-0x80);
02808                     
02809                     if (myTargetIndex < targetLength){
02810                         args->offsets[myTargetIndex] = mySourceIndex-1;
02811                         args->target[myTargetIndex++] = (char)((targetUniChar & 0x00FF)-0x80);
02812                     }
02813                     else{
02814                         args->converter->charErrorBuffer[0] =(char) ((targetUniChar & 0x00FF)-0x80);
02815                         args->converter->charErrorBufferLength = 1;
02816                         *err = U_BUFFER_OVERFLOW_ERROR;
02817                         break;
02818                     }
02819                 }
02820             }
02821             else{
02822 CALLBACK:
02823                 {
02824                     int32_t currentOffset = args->offsets[myTargetIndex-1]+1;
02825                     char * saveTarget = args->target;
02826                     const UChar* saveSource = args->source;
02827                     int32_t *saveOffsets = args->offsets;
02828                     *err = U_INVALID_CHAR_FOUND;
02829                     args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
02830                     args->converter->invalidUCharLength = 1;
02831                 
02832                     /* Breaks out of the loop since behaviour was set to stop */
02833                     args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
02834                     args->target += myTargetIndex;
02835                     args->source += mySourceIndex;
02836                     args->offsets = args->offsets?args->offsets+myTargetIndex:0;
02837                     FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext,
02838                         args,
02839                         args->converter->invalidUCharBuffer,
02840                         1,
02841                         (UChar32)mySourceChar,
02842                         UCNV_UNASSIGNED,
02843                         err);
02844                     isTargetUCharDBCS=(UBool)args->converter->fromUnicodeStatus;
02845                     myConverterData->isShiftAppended =FALSE;
02846                     args->source = saveSource;
02847                     args->target = saveTarget;
02848                     args->offsets = saveOffsets;
02849                     args->converter->invalidUCharLength = 0;
02850                     args->converter->fromUSurrogateLead=0x00;
02851                     if (U_FAILURE (*err))
02852                         break;
02853 
02854                 }
02855             }
02856             targetUniChar=missingCharMarker;
02857         }
02858         else{
02859             
02860             *err = U_BUFFER_OVERFLOW_ERROR;
02861             break;
02862         }
02863         
02864     }
02865     
02866     /*If at the end of conversion we are still carrying state information
02867     *flush is TRUE, we can deduce that the input stream is truncated
02868     */
02869     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == sourceLength) && args->flush){
02870         if (U_SUCCESS(*err)){
02871             *err = U_TRUNCATED_CHAR_FOUND;
02872             args->converter->toUnicodeStatus = 0x00;
02873         }
02874     }
02875     /* Reset the state of converter if we consumed 
02876      * the source and flush is true
02877      */
02878     if( (mySourceIndex == sourceLength) && args->flush){
02879         setInitialStateFromUnicodeKR(args->converter,myConverterData);
02880     }
02881 
02882     args->target += myTargetIndex;
02883     args->source += mySourceIndex;
02884     
02885     
02886 }
02887 
02888 /************************ To Unicode ***************************************/
02889 
02890 U_CFUNC void UConverter_toUnicode_ISO_2022_KR(UConverterToUnicodeArgs *args,
02891                                               UErrorCode* err){
02892     char tempBuf[3];
02893     const char* pBuf;
02894     const char *mySource = ( char *) args->source;
02895     UChar *myTarget = args->target;
02896     char *tempLimit = &tempBuf[2]+1; 
02897     int32_t mySourceIndex = 0;
02898     int32_t myTargetIndex = 0;
02899     const char *mySourceLimit = args->sourceLimit;
02900     UChar32 targetUniChar = 0x0000;
02901     UChar mySourceChar = 0x0000;
02902     int plane = 0; /*dummy variable*/
02903     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
02904     
02905     
02906     /*Arguments Check*/
02907     if (U_FAILURE(*err)) 
02908         return;
02909     
02910     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
02911         *err = U_ILLEGAL_ARGUMENT_ERROR;
02912         return;
02913     }
02914     
02915     while(mySource< args->sourceLimit){
02916         
02917         if(myTarget < args->targetLimit){
02918             
02919             mySourceChar= (unsigned char) *mySource++;
02920 
02921             switch(mySourceChar){
02922             
02923                 case UCNV_SI:
02924                     myData->currentType = SBCS;
02925                     continue;
02926                 case UCNV_SO:
02927                     myData->currentType = DBCS;
02928                     /*consume the source */
02929                     continue;
02930                        
02931                 default: 
02932                     if(myData->key==0){
02933                         break;
02934                     }
02935                 case ESC_2022:
02936                 {
02937                     /* Already doing some conversion and found escape Sequence*/
02938                     if(args->converter->mode == UCNV_SO){
02939                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
02940                         goto SAVE_STATE;
02941                     }
02942                     else{
02943                         mySource--;
02944                         changeState_2022(args->converter,&(mySource), 
02945                         args->sourceLimit, args->flush,ISO_2022,&plane, err);
02946 
02947                        
02948                     }
02949                     if(U_FAILURE(*err)){
02950                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
02951                         goto SAVE_STATE;
02952                     }
02953                     continue;
02954                 }
02955             }
02956              
02957             if(myData->currentType==DBCS){
02958                 if(args->converter->toUnicodeStatus == 0x00){
02959                     args->converter->toUnicodeStatus = (UChar) mySourceChar;
02960                     continue;
02961                 }
02962                 else{
02963                     tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
02964                     tempBuf[1] = (char) (mySourceChar+0x80);
02965                     mySourceChar= (UChar)(((args->converter->toUnicodeStatus << 8)+0x80) | ((mySourceChar & 0x00ff)+0x80));
02966                     args->converter->toUnicodeStatus =0x00;
02967                     pBuf = &tempBuf[0];
02968                     tempLimit = &tempBuf[2]+1;
02969                     targetUniChar = _MBCSSimpleGetNextUChar(myData->fromUnicodeConverter->sharedData,
02970                         &pBuf,tempLimit,args->converter->useFallback);
02971                 }
02972             }
02973             else{
02974                 if(args->converter->fromUnicodeStatus == 0x00){
02975                     tempBuf[0] = (char) mySourceChar;
02976                     pBuf = &tempBuf[0];
02977                     tempLimit = &tempBuf[1];
02978                     targetUniChar = _MBCSSimpleGetNextUChar(myData->fromUnicodeConverter->sharedData,
02979                         &pBuf,tempLimit,args->converter->useFallback);
02980                 }
02981                 else{
02982                     goto SAVE_STATE;
02983                 }
02984 
02985             }
02986             if(targetUniChar < 0xfffe){
02987                 *(myTarget++)=(UChar)targetUniChar;
02988             }
02989             else if(targetUniChar>=0xfffe){
02990 SAVE_STATE:
02991                 {
02992                     const char *saveSource = args->source;
02993                     UChar *saveTarget = args->target;
02994                     int32_t *saveOffsets = args->offsets;
02995                     UConverterCallbackReason reason;
02996                 
02997                     if(targetUniChar == 0xfffe){
02998                         reason = UCNV_UNASSIGNED;
02999                         *err = U_INVALID_CHAR_FOUND;
03000                     }
03001                     else{
03002                         reason = UCNV_ILLEGAL;
03003                         *err = U_ILLEGAL_CHAR_FOUND;
03004                     }
03005                     if(myData->currentType== DBCS){
03006                 
03007                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80);
03008                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80);    
03009                     }
03010                     else{
03011                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar;
03012                     }
03013 
03014                     args->target = myTarget;
03015                     args->source = mySource;
03016                     ToU_CALLBACK_MACRO( args->converter->toUContext,
03017                         args,
03018                         args->converter->invalidCharBuffer,
03019                         args->converter->invalidCharLength,
03020                         reason,
03021                         err);
03022                     args->source  = saveSource;
03023                     myTarget = args->target;
03024                     args->target  = saveTarget;
03025                     args->offsets = saveOffsets;
03026                     args->converter->invalidCharLength=0;
03027                     if(U_FAILURE(*err))
03028                         break;
03029 
03030                 }
03031             }
03032         }
03033         else{
03034             *err =U_BUFFER_OVERFLOW_ERROR;
03035             break;
03036         }
03037     }
03038     if((args->flush==TRUE)
03039         && (mySource == mySourceLimit) 
03040         && ( args->converter->toUnicodeStatus !=0x00)){
03041         if(U_SUCCESS(*err)){
03042             *err = U_TRUNCATED_CHAR_FOUND;
03043             args->converter->toUnicodeStatus = 0x00;
03044         }
03045     }
03046     /* Reset the state of converter if we consumed 
03047      * the source and flush is true
03048      */
03049     if( (mySource == mySourceLimit) && args->flush){
03050         setInitialStateToUnicodeKR(args->converter,myData);
03051     }
03052 
03053     args->target = myTarget;
03054     args->source = mySource;
03055 }
03056 
03057 U_CFUNC void UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
03058                                                             UErrorCode* err){
03059     char tempBuf[3];
03060     const char* pBuf;
03061     const char *mySource = ( char *) args->source;
03062     UChar *myTarget = args->target;
03063     char *tempLimit = &tempBuf[2]+1; 
03064     int32_t mySourceIndex = 0;
03065     int32_t myTargetIndex = 0;
03066     const char *mySourceLimit = args->sourceLimit;
03067     UChar32 targetUniChar = 0x0000;
03068     UChar mySourceChar = 0x0000;
03069     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
03070     int plane =0; /*dummy variable */
03071     
03072     /*Arguments Check*/
03073     if (U_FAILURE(*err)) 
03074         return;
03075     
03076     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
03077         *err = U_ILLEGAL_ARGUMENT_ERROR;
03078         return;
03079     }
03080     
03081     while(mySource< args->sourceLimit){
03082         
03083         if(myTarget < args->targetLimit){
03084             
03085             mySourceChar= (unsigned char) *mySource++;
03086             
03087             switch(mySourceChar){
03088             
03089                 case UCNV_SI:
03090                     myData->currentType = SBCS;
03091                     continue;
03092                 case UCNV_SO:
03093                     myData->currentType =DBCS;
03094                     /*consume the source */
03095                     continue;
03096                        
03097                 default:
03098                     /* If we are in the process of consuming an escape sequence 
03099                      * we fall through execute the the statements of next switch 
03100                      * tag else we break;
03101                      */
03102                     if(myData->key==0){
03103                         break;
03104                     }
03105                 case ESC_2022:
03106                 {
03107                     /* Already doing some conversion and found escape Sequence*/
03108                     if(args->converter->mode == UCNV_SO){
03109                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
03110                         goto SAVE_STATE;
03111                     }
03112                     else{
03113                         mySource--;
03114                         changeState_2022(args->converter,&(mySource), 
03115                         args->sourceLimit, args->flush,ISO_2022,&plane, err);
03116 
03117                        
03118                     }
03119                     if(U_FAILURE(*err)){
03120                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
03121                         goto SAVE_STATE;
03122                     }
03123                     continue;
03124                 }
03125             }
03126              
03127             if(myData->currentType==DBCS){
03128                 if(args->converter->toUnicodeStatus == 0x00){
03129                     args->converter->toUnicodeStatus = (UChar) mySourceChar;
03130                     continue;
03131                 }
03132                 else{
03133                     tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
03134                     tempBuf[1] = (char) (mySourceChar+0x80);
03135                     mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
03136                     args->converter->toUnicodeStatus =0x00;
03137                     pBuf = &tempBuf[0];
03138                     tempLimit = &tempBuf[2]+1;
03139                     targetUniChar = _MBCSSimpleGetNextUChar(myData->fromUnicodeConverter->sharedData,
03140                         &pBuf,tempLimit,args->converter->useFallback);
03141                 }
03142             }
03143             else{
03144                 if(args->converter->fromUnicodeStatus == 0x00){
03145                     tempBuf[0] = (char) mySourceChar;
03146                     pBuf = &tempBuf[0];
03147                     tempLimit = &tempBuf[1];
03148                     targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData,
03149                         &pBuf,tempLimit,args->converter->useFallback);
03150                 }
03151                 else{
03152                     goto SAVE_STATE;
03153                 }
03154 
03155             }
03156             if(targetUniChar < 0xfffe){
03157                  if(myData->currentType==DBCS){
03158                     args->offsets[myTarget - args->target]= mySource - args->source - 2;
03159                 }
03160                 else{
03161                     args->offsets[myTarget - args->target]= mySource - args->source - 1;
03162                 }
03163                 *(myTarget++)=(UChar)targetUniChar;
03164             }
03165             else if(targetUniChar>=0xfffe){
03166 SAVE_STATE:
03167                 {
03168                     const char *saveSource = args->source;
03169                     UChar *saveTarget = args->target;
03170                     int32_t *saveOffsets = args->offsets;
03171                     
03172                     UConverterCallbackReason reason;
03173                     int32_t currentOffset ;
03174                     int32_t My_i = myTarget - args->target;
03175                     
03176                     if(targetUniChar == 0xfffe){
03177                         reason = UCNV_UNASSIGNED;
03178                         *err = U_INVALID_CHAR_FOUND;
03179                     }
03180                     else{
03181                         reason = UCNV_ILLEGAL;
03182                         *err = U_ILLEGAL_CHAR_FOUND;
03183                     }
03184                     if(myData->currentType== DBCS){
03185 
03186                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80);
03187                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80);
03188                         currentOffset= mySource - args->source -2;
03189                     
03190                     }
03191                     else{
03192                         args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar;
03193                         currentOffset= mySource - args->source -1;
03194                     }
03195                     args->offsets = args->offsets?args->offsets+(myTarget - args->target):0;
03196                     args->target = myTarget;
03197                     args->source = mySource;
03198                     myTarget = saveTarget;
03199                     ToU_CALLBACK_OFFSETS_LOGIC_MACRO( args->converter->toUContext,
03200                         args,
03201                         args->converter->invalidCharBuffer,
03202                         args->converter->invalidCharLength,
03203                         reason,
03204                         err);
03205                     args->converter->invalidCharLength=0;
03206                     args->source  = saveSource;
03207                     myTarget = args->target;
03208                     args->target  = saveTarget;
03209                     args->offsets = saveOffsets;
03210                     if(U_FAILURE(*err))
03211                         break;
03212 
03213                 }
03214             }
03215         }
03216         else{
03217             *err =U_BUFFER_OVERFLOW_ERROR;
03218             break;
03219         }
03220     }
03221     if((args->flush==TRUE)
03222         && (mySource == mySourceLimit) 
03223         && ( args->converter->toUnicodeStatus !=0x00)){
03224         if(U_SUCCESS(*err)){
03225             *err = U_TRUNCATED_CHAR_FOUND;
03226             args->converter->toUnicodeStatus = 0x00;
03227         }
03228     }
03229     /* Reset the state of converter if we consumed 
03230      * the source and flush is true
03231      */
03232     if( (mySource == mySourceLimit) && args->flush){
03233         setInitialStateToUnicodeKR(args->converter,myData);
03234     }
03235 
03236     args->target = myTarget;
03237     args->source = mySource;
03238 }
03239 
03240 /*
03241 * This is a simple, interim implementation of GetNextUChar()
03242 * that allows to concentrate on testing one single implementation
03243 * of the ToUnicode conversion before it gets copied to
03244 * multiple version that are then optimized for their needs
03245 * (with vs. without offsets and getNextUChar).
03246 */
03247 
03248 U_CFUNC UChar32
03249 UConverter_getNextUChar_ISO_2022_KR(UConverterToUnicodeArgs *pArgs,
03250                                     UErrorCode *pErrorCode) {
03251     UChar buffer[UTF_MAX_CHAR_LENGTH];
03252     const char *realLimit=pArgs->sourceLimit;
03253     
03254     pArgs->target=buffer;
03255     pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
03256     
03257     while(pArgs->source<realLimit) {
03258         /* feed in one byte at a time to make sure to get only one character out */
03259         pArgs->sourceLimit=pArgs->source+1;
03260         pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
03261         UConverter_toUnicode_ISO_2022_KR(pArgs, pErrorCode);
03262         if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
03263             return 0xffff;
03264         } else if(pArgs->target!=buffer) {
03265             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
03266                 *pErrorCode=U_ZERO_ERROR;
03267             }
03268             return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
03269         }
03270     }
03271     
03272     /* no output because of empty input or only state changes and skipping callbacks */
03273     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
03274     return 0xffff;
03275 }
03276 
03277 /*************************** END ISO2022-KR *********************************/
03278 
03279 
03280 
03281 /*************************** ISO-2022-CN *********************************
03282 * 
03283 * Rules for ISO-2022-CN Encoding:
03284 * i)   The desinator sequence must appear once on a line before any instance
03285 *      of character set it designates.
03286 * ii)  If two lines contain characters from the same character set, both lines
03287 *      must include the designator sequence.
03288 * iii) Once the designator sequence is know, a shifting sequnce has to found
03289 *      to invoke the  shifting
03290 * iv)  All lines start in ASCII and end in ASCII.  
03291 * v)   Four shifting sequences are employed for this purpose:
03292 *
03293 *      Sequcence   ASCII Eq    Charsets
03294 *      ----------  -------    ---------     
03295 *      SS2          <ESC>N      CNS-11643-1992 Planes 3-7
03296 *      SS3          <ESC>O      CNS-11643-1992 Plane 2
03297 *      SI           <SI>        
03298 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312,ISO-IR-165
03299 *
03300 * vi)   
03301 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
03302 *      SS2designator : ESC "$" "*" finalchar_for_SS2
03303 *      SS3designator : ESC "$" "+" finalchar_for_SS3
03304 *
03305 *      ESC $ ) A       Indicates the bytes following SO are Chinese
03306 *       characters as defined in GB 2312-80, until
03307 *       another SOdesignation appears
03308 *               
03309 *
03310 *      ESC $ ) E       Indicates the bytes following SO are as defined
03311 *       in ISO-IR-165 (for details, see section 2.1),
03312 *       until another SOdesignation appears
03313 *
03314 *      ESC $ ) G       Indicates the bytes following SO are as defined
03315 *       in CNS 11643-plane-1, until another
03316 *       SOdesignation appears
03317 *
03318 *      ESC $ * H       Indicates the two bytes immediately following
03319 *       SS2 is a Chinese character as defined in CNS
03320 *       11643-plane-2, until another SS2designation
03321 *       appears
03322 *       (Meaning <ESC>N must preceed every 2 byte 
03323 *        sequence.)
03324 *
03325 *      ESC $ + I       Indicates the immediate two bytes following SS3
03326 *       is a Chinese character as defined in CNS
03327 *       11643-plane-3, until another SS3designation
03328 *       appears
03329 *       (Meaning <ESC>O must preceed every 2 byte 
03330 *        sequence.)   
03331 *
03332 *      ESC $ + J       Indicates the immediate two bytes following SS3
03333 *       is a Chinese character as defined in CNS
03334 *       11643-plane-4, until another SS3designation
03335 *       appears
03336 *       (In English: <ESC>N must preceed every 2 byte 
03337 *        sequence.)   
03338 *
03339 *      ESC $ + K       Indicates the immediate two bytes following SS3
03340 *       is a Chinese character as defined in CNS
03341 *       11643-plane-5, until another SS3designation
03342 *       appears
03343 *
03344 *      ESC $ + L       Indicates the immediate two bytes following SS3
03345 *       is a Chinese character as defined in CNS
03346 *       11643-plane-6, until another SS3designation
03347 *       appears
03348 *
03349 *      ESC $ + M       Indicates the immediate two bytes following SS3
03350 *       is a Chinese character as defined in CNS
03351 *       11643-plane-7, until another SS3designation
03352 *       appears
03353 *
03354 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
03355 *       has its own designation information before any Chinese characters
03356 *       appear
03357 *
03358 */
03359 
03360 
03361 /********************** ISO2022-CN Data **************************/
03362 static const char* escSeqCharsCN[10] ={
03363         "\x0F",             /* ASCII */
03364         "\x1B\x24\x29\x41", /* GB 2312-80 */
03365         "\x1B\x24\x29\x45", /* ISO-IR-165 */
03366         "\x1B\x24\x29\x47", /* CNS 11643-1992 Plane 1 */
03367         "\x1B\x24\x2A\x48", /* CNS 11643-1992 Plane 2 */
03368         "\x1B\x24\x2B\x49", /* CNS 11643-1992 Plane 3 */
03369         "\x1B\x24\x2B\x4A", /* CNS 11643-1992 Plane 4 */
03370         "\x1B\x24\x2B\x4B", /* CNS 11643-1992 Plane 5 */
03371         "\x1B\x24\x2B\x4C", /* CNS 11643-1992 Plane 6 */
03372         "\x1B\x24\x2B\x4D"  /* CNS 11643-1992 Plane 7 */
03373 };
03374 
03375 static const char* shiftSeqCharsCN[10] ={
03376         "",
03377         (const char*) "\x0E",
03378         (const char*) "\x0E",
03379         (const char*) "\x0E",
03380         (const char*) UCNV_SS2,
03381         (const char*) UCNV_SS3,
03382         (const char*) UCNV_SS3,
03383         (const char*) UCNV_SS3,
03384         (const char*) UCNV_SS3,
03385         (const char*) UCNV_SS3
03386 };
03387 
03388 typedef enum  {
03389         ASCII_1=0,
03390         GB2312_1=1,
03391         ISO_IR_165=2,
03392         CNS_11643=3
03393 } StateEnumCN;
03394 
03395 static Cnv2022Type myConverterTypeCN[4]={
03396         ASCII1,
03397         DBCS,
03398         DBCS,
03399         MBCS
03400 };
03401 
03402 
03403 U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args, UErrorCode* err){
03404     
03405     UChar* mySource =(UChar*)args->source;
03406     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
03407     UConverterCallbackReason reason;
03408     UBool isEscapeAppended = FALSE;
03409     StateEnumCN initIterState;
03410     unsigned char *myTarget = (unsigned char *) args->target; 
03411     const UChar *saveSource;
03412     uint32_t targetValue=0;
03413     char *saveTarget;
03414     int32_t myTargetLength = args->targetLimit - args->target;
03415     int32_t mySourceLength = args->sourceLimit - args->source;
03416     int32_t mySourceIndex = 0;
03417     int32_t myTargetIndex = 0;
03418     int32_t length  =0;
03419     int plane = 0;
03420     CompactShortArray *myFromUnicodeDBCS = NULL;
03421     CompactShortArray *myFromUnicodeDBCSFallback = NULL;
03422 
03423     UChar32 targetUniChar = missingCharMarker;
03424     
03425     StateEnumCN currentState=ASCII;
03426     
03427     UChar32 mySourceChar = 0x0000;
03428     int iterCount = 0;
03429     const char *escSeq = NULL;
03430     UBool isShiftAppended = FALSE;
03431 
03432     isEscapeAppended =(UBool) myConverterData->isEscapeAppended;
03433     isShiftAppended =(UBool) myConverterData->isShiftAppended;
03434     initIterState = (StateEnumCN)myConverterData->fromUnicodeCurrentState;
03435     /* arguments check*/
03436     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
03437         *err = U_ILLEGAL_ARGUMENT_ERROR;
03438         return;
03439     }
03440     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < myTargetLength) {
03441         goto getTrail;
03442     }
03443     while(mySourceIndex <  mySourceLength){
03444         currentState =(StateEnumCN) myConverterData->fromUnicodeCurrentState;
03445         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
03446             myConverterData->myConverterArray[0] :
03447         myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
03448         
03449         if(myTargetIndex < myTargetLength){
03450             
03451             mySourceChar = (UChar) args->source[mySourceIndex++];
03452             
03453             /* I am handling surrogates in the begining itself so that I donot have to go through 4
03454              * iterations on codepages that we support. 
03455              */
03456             if(UTF_IS_SURROGATE(mySourceChar)) {
03457                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
03458                     args->converter->fromUSurrogateLead=(UChar)mySourceChar;
03459 getTrail:
03460                     /*look ahead to find the trail surrogate*/
03461                     if(mySourceIndex <  mySourceLength) {
03462                         /* test the following code unit */
03463                         UChar trail=(UChar) args->source[mySourceIndex];
03464                         if(UTF_IS_SECOND_SURROGATE(trail)) {
03465                             ++mySourceIndex;
03466                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
03467                             args->converter->fromUSurrogateLead=0x00;
03468                             /* convert this surrogate code point */
03469                             /* exit this condition tree */
03470                         } else {
03471                             /* this is an unmatched lead code unit (1st surrogate) */
03472                             /* callback(illegal) */
03473                             reason=UCNV_ILLEGAL;
03474                             *err=U_ILLEGAL_CHAR_FOUND;
03475                             goto CALLBACK;
03476                         }
03477                     } else {
03478                         /* no more input */
03479                         break;
03480                     }
03481                 } else {
03482                     /* this is an unmatched trail code unit (2nd surrogate) */
03483                     /* callback(illegal) */
03484                     reason=UCNV_ILLEGAL;
03485                     *err=U_ILLEGAL_CHAR_FOUND;
03486                     goto CALLBACK;
03487                 }
03488             }
03489             /* if the source character is CR or LF then append the ASCII escape sequence*/
03490             if(mySourceChar== 0x000A || mySourceChar== 0x000D){
03491                 
03492                 if((int)currentState > 0 && isShiftAppended){
03493                     concatChar(args, &myTargetIndex, &myTargetLength, UCNV_SI,err,&mySourceIndex);
03494                     isShiftAppended=myConverterData->isShiftAppended=FALSE;
03495                     TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex,myConverterData, err);
03496                 }
03497                 myConverterData->isEscapeAppended=isEscapeAppended=FALSE;
03498                 targetUniChar = mySourceChar;
03499                 concatString(args, &myTargetIndex, &myTargetLength,&targetUniChar,err,&mySourceIndex);
03500                 TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex,myConverterData, err);
03501                 
03502                 continue;
03503             }
03504             else{
03505                 do{
03506                     switch (myConverterTypeCN[currentState]){
03507                     
03508                         case DBCS:
03509                             if(mySourceChar<0xffff){
03510                                 myFromUnicodeDBCS = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicode;
03511                                 myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
03512                                 targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
03513                                
03514                                 /* There are no fallbacks in ISO-IR-165 or GB_2312_1 so we can
03515                                  * safely ignore the codepaths below
03516                                  */
03517                                 /*
03518                                 if ((targetUniChar==missingCharMarker) && UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
03519                                     (myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
03520                                     targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
03521                                 } */
03522                             }
03523                             if(( myConverterData->version) == 0 && currentState==ISO_IR_165){
03524                                 targetUniChar=missingCharMarker;
03525                             }
03526                             break;
03527                     
03528                         case MBCS:
03529                     
03530                             length= _MBCSFromUChar32(myConverterData->fromUnicodeConverter->sharedData,
03531                                 mySourceChar,&targetValue,args->converter->useFallback);
03532                     
03533                             targetUniChar = (UChar32) targetValue;
03534                     
03535                             if(length==0){
03536                                 targetUniChar = missingCharMarker;
03537                             } 
03538                             else if(length==3){
03539                                 uint8_t planeVal = (uint8_t) ((targetValue)>>16);
03540                                 if(planeVal >0x80 && planeVal<0x89){
03541                                     plane = (int)(planeVal - 0x80);
03542                                     targetUniChar -= (planeVal<<16);
03543                                 }else 
03544                                     plane =-1;
03545                             }
03546                             else if(length >3){
03547                                 reason =UCNV_ILLEGAL;
03548                                 *err =U_INVALID_CHAR_FOUND;
03549                                 goto CALLBACK;
03550                             }
03551                             if(myConverterData->version == 0 && plane >2){
03552                                     targetUniChar = missingCharMarker;
03553                             }
03554                             break;
03555 
03556                         case ASCII1:
03557                             if(mySourceChar < 0x7f){
03558                                 targetUniChar = mySourceChar;
03559                             }
03560                             else 
03561                                 targetUniChar = missingCharMarker;
03562                             break;
03563 
03564                         case LATIN1:
03565                             /*not expected*/
03566                               break;
03567                     
03568                         default:
03569                             /*not expected */ 
03570                             break;
03571                     }
03572                     if(targetUniChar==missingCharMarker){
03573                         iterCount = (iterCount<3)? iterCount+1 : 0;
03574                         myConverterData->fromUnicodeCurrentState=currentState=(StateEnumCN)(currentState<3)? currentState+1:0;
03575                         currentState =(StateEnumCN) myConverterData->fromUnicodeCurrentState;
03576                         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
03577                                                         myConverterData->myConverterArray[0] :
03578                                                     myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
03579                         targetUniChar =missingCharMarker;
03580                         isEscapeAppended = FALSE; 
03581                         /* save the state */
03582                         myConverterData->isEscapeAppended = isEscapeAppended;
03583                         myConverterData->isShiftAppended =isShiftAppended;
03584                         myConverterData->sourceIndex = mySourceIndex;
03585                         myConverterData->targetIndex = myTargetIndex;
03586                     }
03587                 }while(targetUniChar==missingCharMarker && initIterState != currentState);
03588             }
03589             
03590             if(targetUniChar!= missingCharMarker){
03591                 
03592                 /* set the iteration state and iteration count  */
03593                 initIterState = currentState;
03594                 iterCount =0;
03595                 if(myConverterData->plane != plane){
03596                     isEscapeAppended=myConverterData->isEscapeAppended=FALSE;
03597                     myConverterData->plane = plane;
03598                 }
03599                 /* Append the escpace sequence */
03600                 if(!isEscapeAppended){
03601                     escSeq = (currentState==CNS_11643) ? escSeqCharsCN[(int)currentState+plane-1]:escSeqCharsCN[(int)currentState];
03602                     concatEscape(args, &myTargetIndex, &myTargetLength, 
03603                         escSeq,err,strlen(escSeq),&mySourceIndex);
03604                     isEscapeAppended=myConverterData->isEscapeAppended=TRUE;
03605                     
03606                     
03607                     
03608                 }
03609                 /* Append Shift Sequences */
03610                 if(currentState!=ASCII){
03611                     
03612                     if(currentState!=CNS_11643 ){
03613                         if(!isShiftAppended){
03614                             concatEscape(args,&myTargetIndex,&myTargetLength,
03615                                 shiftSeqCharsCN[currentState],err,
03616                                 strlen(shiftSeqCharsCN[currentState]),&mySourceIndex);
03617                             myConverterData->isShiftAppended =isShiftAppended=TRUE;
03618                         }
03619                        
03620                     }
03621                     else{
03622                         concatEscape(args,&myTargetIndex,&myTargetLength,shiftSeqCharsCN[currentState+plane],
03623                             err,strlen(shiftSeqCharsCN[currentState+plane]),&mySourceIndex);
03624                         
03625                         myConverterData->isShiftAppended =isShiftAppended=FALSE;
03626                     
03627                     }
03628                     
03629                 }
03630                 
03631                 concatString(args, &myTargetIndex, &myTargetLength,
03632                     &targetUniChar,err, &mySourceIndex);
03633                 TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex,myConverterData, err);
03634                 
03635             }/* end of end if(targetUniChar==missingCharMarker)*/
03636             else{
03637 
03638                 /* if we cannot find the character after checking all codepages 
03639                  * then this is an error
03640                  */
03641                     reason = UCNV_UNASSIGNED;
03642                     *err = U_INVALID_CHAR_FOUND;
03643                     args->converter->invalidUCharBuffer[0]=(UChar)mySourceChar;
03644                     args->converter->invalidUCharLength++;
03645                     
03646 CALLBACK:
03647                     saveSource = args->source;
03648                     saveTarget = args->target;
03649      
03650                     args->target = (char*)myTarget + myTargetIndex;
03651                     args->source = mySource + mySourceIndex;
03652                     myConverterData->isShiftAppended =isShiftAppended;
03653 
03654                     FromU_CALLBACK_MACRO(args->converter->fromUContext,
03655                         args,
03656                         args->converter->invalidUCharBuffer,
03657                         args->converter->invalidUCharLength,
03658                         (UChar32) (args->converter->invalidUCharLength == 2 ? 
03659                         UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
03660                         args->converter->invalidUCharBuffer[1]) 
03661                         : args->converter->invalidUCharBuffer[0]),
03662                         reason,
03663                         err);
03664                     args->converter->invalidUCharLength=0;
03665                     myConverterData->isEscapeAppended = isEscapeAppended=FALSE;
03666                     args->source=saveSource;
03667                     args->target=saveTarget;
03668                     args->converter->fromUSurrogateLead=0x00;
03669                     initIterState = (StateEnumCN) myConverterData->fromUnicodeCurrentState;
03670 
03671                     if (U_FAILURE (*err)){
03672                         break;
03673                     }
03674                   
03675             }
03676             targetUniChar =missingCharMarker;
03677         } /* end if(myTargetIndex<myTargetLength) */
03678         else{
03679             *err =U_BUFFER_OVERFLOW_ERROR;
03680             break;
03681         }
03682         
03683     }/* end while(mySourceIndex<mySourceLength) */
03684     
03685     /*If at the end of conversion we are still carrying state information
03686      *flush is TRUE, we can deduce that the input stream is truncated
03687      */
03688     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){
03689         if (U_SUCCESS(*err)){
03690             *err = U_TRUNCATED_CHAR_FOUND;
03691             args->converter->toUnicodeStatus = 0x00;
03692         }
03693     }
03694     /* Reset the state of converter if we consumed 
03695      * the source and flush is true
03696      */
03697     if( (mySourceIndex == mySourceLength) && args->flush){
03698         setInitialStateFromUnicodeJPCN(myConverterData);
03699     }
03700 
03701     /*save the state and return */
03702     args->target += myTargetIndex;
03703     args->source += mySourceIndex;
03704     myConverterData->sourceIndex = 0;
03705     myConverterData->targetIndex = 0;
03706 }
03707 
03708 U_CFUNC void UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
03709     
03710     UChar* mySource =(UChar*)args->source;
03711     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
03712     UConverterCallbackReason reason;
03713     UBool isEscapeAppended = FALSE;
03714     StateEnumCN initIterState;
03715     unsigned char *myTarget = (unsigned char *) args->target; 
03716     const UChar *saveSource;
03717     uint32_t targetValue=0;
03718     char *saveTarget;
03719     int32_t *saveOffsets ;
03720     int32_t myTargetLength = args->targetLimit - args->target;
03721     int32_t mySourceLength = args->sourceLimit - args->source;
03722     int32_t mySourceIndex = 0;
03723     int32_t myTargetIndex = 0;
03724     int32_t length  =0;
03725     int plane = 0;
03726     CompactShortArray *myFromUnicodeDBCS = NULL;
03727     CompactShortArray *myFromUnicodeDBCSFallback = NULL;
03728     UChar32 targetUniChar = missingCharMarker;
03729     int32_t currentOffset=0;
03730     StateEnumCN currentState=ASCII;
03731     
03732     UChar32 mySourceChar = 0x0000;
03733     int iterCount = 0;
03734     const char *escSeq = NULL;
03735     UBool isShiftAppended = FALSE;
03736 
03737     isEscapeAppended =(UBool) myConverterData->isEscapeAppended;
03738     isShiftAppended =(UBool) myConverterData->isShiftAppended;
03739     initIterState = (StateEnumCN)myConverterData->fromUnicodeCurrentState;
03740     /* arguments check*/
03741     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
03742         *err = U_ILLEGAL_ARGUMENT_ERROR;
03743         return;
03744     }
03745     if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < myTargetLength) {
03746         goto getTrail;
03747     }
03748     while(mySourceIndex <  mySourceLength){
03749         currentState =(StateEnumCN) myConverterData->fromUnicodeCurrentState;
03750         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
03751             myConverterData->myConverterArray[0] :
03752         myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
03753         
03754         if(myTargetIndex < myTargetLength){
03755             
03756             mySourceChar = (UChar) args->source[mySourceIndex++];
03757             
03758             /* I am handling surrogates in the begining itself so that I donot have to go through 4
03759              * iterations on codepages that we support. 
03760              */
03761             if(UTF_IS_SURROGATE(mySourceChar)) {
03762                 if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
03763                     args->converter->fromUSurrogateLead=(UChar)mySourceChar;
03764 getTrail:
03765                     /*look ahead to find the trail surrogate*/
03766                     if(mySourceIndex <  mySourceLength) {
03767                         /* test the following code unit */
03768                         UChar trail=(UChar) args->source[mySourceIndex];
03769                         if(UTF_IS_SECOND_SURROGATE(trail)) {
03770                             ++mySourceIndex;
03771                             mySourceChar=UTF16_GET_PAIR_VALUE(mySourceChar, trail);
03772                             args->converter->fromUSurrogateLead=0x00;
03773                             /* convert this surrogate code point */
03774                             /* exit this condition tree */
03775                         } else {
03776                             /* this is an unmatched lead code unit (1st surrogate) */
03777                             /* callback(illegal) */
03778                             reason=UCNV_ILLEGAL;
03779                             *err=U_ILLEGAL_CHAR_FOUND;
03780                             goto CALLBACK;
03781                         }
03782                     } else {
03783                         /* no more input */
03784                         break;
03785                     }
03786                 } else {
03787                     /* this is an unmatched trail code unit (2nd surrogate) */
03788                     /* callback(illegal) */
03789                     reason=UCNV_ILLEGAL;
03790                     *err=U_ILLEGAL_CHAR_FOUND;
03791                     goto CALLBACK;
03792                 }
03793             }
03794             /* if the source character is CR or LF then append the ASCII escape sequence*/
03795             if(mySourceChar== 0x000A || mySourceChar== 0x000D){
03796                 
03797                 if((int)currentState > 0 && isShiftAppended){
03798                     concatChar(args, &myTargetIndex, &myTargetLength, UCNV_SI,err,&mySourceIndex);
03799                     isShiftAppended=myConverterData->isShiftAppended=FALSE;
03800                     TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex,myConverterData, err);
03801                 }
03802                 myConverterData->isEscapeAppended=isEscapeAppended=FALSE;
03803                 targetUniChar = mySourceChar;
03804                 concatString(args, &myTargetIndex, &myTargetLength,&targetUniChar,err,&mySourceIndex);
03805                 TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex,myConverterData, err);
03806                 
03807                 continue;
03808             }
03809             else{
03810                 do{
03811                     switch (myConverterTypeCN[currentState]){
03812                                                                                        
03813                         case DBCS:
03814                             if(mySourceChar<0xffff){
03815                                 myFromUnicodeDBCS = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicode;
03816                                 myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
03817                                 targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
03818                                 /* There are no fallbacks in ISO-IR-165 or GB_2312_1 so we can
03819                                  * safely ignore the codepaths below
03820                                  */
03821                                 /*if ((targetUniChar==missingCharMarker) && UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
03822                                     (myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
03823                                     targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
03824                                 } */
03825                             }
03826                             if(( myConverterData->version) == 0 && currentState==ISO_IR_165){
03827                                 targetUniChar=missingCharMarker;
03828                             }
03829                             break;
03830                     
03831                         case MBCS:
03832                     
03833                             length= _MBCSFromUChar32(myConverterData->fromUnicodeConverter->sharedData,
03834                                 mySourceChar,&targetValue,args->converter->useFallback);
03835                     
03836                             targetUniChar = (UChar32) targetValue;
03837                     
03838                             if(length==0){
03839                                 targetUniChar = missingCharMarker;
03840                             } 
03841                             else if(length==3){
03842                                 uint8_t planeVal = (uint8_t) ((targetValue)>>16);
03843                                 if(planeVal >0x80 && planeVal<0x89){
03844                                     plane = (int)(planeVal - 0x80);
03845                                     targetUniChar -= (planeVal<<16);
03846                                 }else 
03847                                     plane =-1;
03848                             }
03849                             else if(length >3){
03850                                 reason =UCNV_ILLEGAL;
03851                                 *err =U_INVALID_CHAR_FOUND;
03852                                 goto CALLBACK;
03853                             }
03854                             if(myConverterData->version == 0 && plane >2){
03855                                     targetUniChar = missingCharMarker;
03856                             }
03857                             break;
03858 
03859                         case ASCII1:
03860                             if(mySourceChar < 0x7f){
03861                                 targetUniChar = mySourceChar;
03862                             }
03863                             else 
03864                                 targetUniChar = missingCharMarker;
03865                             break;
03866 
03867                         case LATIN1:
03868                             /*not expected*/
03869                               break;
03870                         default:
03871                             /*not expected */ 
03872                             break;
03873                     }
03874                     if(targetUniChar==missingCharMarker){
03875                         iterCount = (iterCount<3)? iterCount+1 : 0;
03876                         myConverterData->fromUnicodeCurrentState=currentState=(StateEnumCN)(currentState<3)? currentState+1:0;
03877                         currentState =(StateEnumCN) myConverterData->fromUnicodeCurrentState;
03878                         myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
03879                                                         myConverterData->myConverterArray[0] :
03880                                                     myConverterData->myConverterArray[(int)myConverterData->fromUnicodeCurrentState];
03881                         targetUniChar =missingCharMarker;
03882                         isEscapeAppended = FALSE; 
03883                         /* save the state */
03884                         myConverterData->isEscapeAppended = isEscapeAppended;
03885                         myConverterData->isShiftAppended =isShiftAppended;
03886                         myConverterData->sourceIndex = mySourceIndex;
03887                         myConverterData->targetIndex = myTargetIndex;
03888                     }
03889                 }while(targetUniChar==missingCharMarker && initIterState != currentState);
03890             }
03891             
03892             if(targetUniChar!= missingCharMarker){
03893                 
03894                 /* set the iteration state and iteration count  */
03895                 initIterState = currentState;
03896                 iterCount =0;
03897                 if(myConverterData->plane != plane){
03898                     isEscapeAppended=myConverterData->isEscapeAppended=FALSE;
03899                     myConverterData->plane = plane;
03900                 }
03901                 /* Append the escpace sequence */
03902                 if(!isEscapeAppended){
03903                     escSeq = (currentState==CNS_11643) ? escSeqCharsCN[(int)currentState+plane-1]:escSeqCharsCN[(int)currentState];
03904                     concatEscape(args, &myTargetIndex, &myTargetLength, 
03905                         escSeq,err,strlen(escSeq),&mySourceIndex);
03906                     isEscapeAppended=myConverterData->isEscapeAppended=TRUE;
03907                     
03908                     
03909                     
03910                 }
03911                 /* Append Shift Sequences */
03912                 if(currentState!=ASCII){
03913                     
03914                     if(currentState!=CNS_11643 ){
03915                         if(!isShiftAppended){
03916                             concatEscape(args,&myTargetIndex,&myTargetLength,
03917                                 shiftSeqCharsCN[currentState],err,
03918                                 strlen(shiftSeqCharsCN[currentState]),&mySourceIndex);
03919                             myConverterData->isShiftAppended =isShiftAppended=TRUE;
03920                         }
03921                        
03922                     }
03923                     else{
03924                         concatEscape(args,&myTargetIndex,&myTargetLength,shiftSeqCharsCN[currentState+plane],
03925                             err,strlen(shiftSeqCharsCN[currentState+plane]),&mySourceIndex);
03926                         
03927                         myConverterData->isShiftAppended =isShiftAppended=FALSE;
03928                     
03929                     }
03930                     
03931                 }
03932                 
03933                 concatString(args, &myTargetIndex, &myTargetLength,
03934                     &targetUniChar,err, &mySourceIndex);
03935                 TEST_ERROR_CONDITION_CN(args,myTargetIndex, mySourceIndex,myConverterData, err);
03936                 
03937             }/* end of end if(targetUniChar==missingCharMarker)*/
03938             else{
03939 
03940                
03941                 /* if we cannot find the character after checking all codepages 
03942                  * then this is an error
03943                  */
03944                     reason = UCNV_UNASSIGNED;
03945                     *err = U_INVALID_CHAR_FOUND;
03946                     
03947 CALLBACK:
03948                     args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++]=(UChar)mySourceChar;
03949                     currentOffset = args->offsets[myTargetIndex-1]+1;
03950                     saveSource = args->source;
03951                     saveTarget = args->target;
03952                     saveOffsets = args->offsets;
03953                     args->target = (char*)myTarget + myTargetIndex;
03954                     args->source = mySource + mySourceIndex;
03955 
03956                     myConverterData->isEscapeAppended = isEscapeAppended;
03957                     myConverterData->isShiftAppended =isShiftAppended;
03958                     myConverterData->sourceIndex = mySourceIndex;
03959                     myConverterData->targetIndex = myTargetIndex;
03960 
03961                     args->offsets = args->offsets?args->offsets+myTargetIndex:0;
03962                     FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext,
03963                         args,
03964                         args->converter->invalidUCharBuffer,
03965                         args->converter->invalidUCharLength,
03966                         (UChar32) (args->converter->invalidUCharLength == 2 ? 
03967                         UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
03968                         args->converter->invalidUCharBuffer[1]) 
03969                         : args->converter->invalidUCharBuffer[0]),
03970                         reason,
03971                         err);
03972                     args->converter->invalidUCharLength=0;
03973                     args->source=saveSource;
03974                     args->target=saveTarget;
03975                     args->offsets=saveOffsets;
03976                     initIterState = (StateEnumCN)myConverterData->fromUnicodeCurrentState;
03977                     myConverterData->isEscapeAppended=isEscapeAppended=FALSE;
03978                     args->converter->fromUSurrogateLead=0x00;
03979 
03980                     if (U_FAILURE (*err)){
03981                         break;
03982                     }
03983             }
03984             targetUniChar =missingCharMarker;
03985         } /* end if(myTargetIndex<myTargetLength) */
03986         else{
03987             *err =U_BUFFER_OVERFLOW_ERROR;
03988             break;
03989         }
03990         
03991     }/* end while(mySourceIndex<mySourceLength) */
03992     /*If at the end of conversion we are still carrying state information
03993     *flush is TRUE, we can deduce that the input stream is truncated
03994     */
03995     if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){
03996         if (U_SUCCESS(*err)){
03997             *err = U_TRUNCATED_CHAR_FOUND;
03998             args->converter->toUnicodeStatus = 0x00;
03999         }
04000     }
04001     /* Reset the state of converter if we consumed 
04002      * the source and flush is true
04003      */
04004     if( (mySourceIndex == mySourceLength) && args->flush){
04005         setInitialStateFromUnicodeJPCN(myConverterData);
04006     }
04007 
04008     
04009     /*save the state and return */
04010     args->target += myTargetIndex;
04011     args->source += mySourceIndex;
04012     myConverterData->sourceIndex = 0;
04013     myConverterData->targetIndex = 0;
04014 }
04015 
04016 /*************** to unicode *******************/
04017 static StateEnum nextStateToUnicodeCN[2][MAX_STATES_2022]= {
04018 
04019     {
04020 /*      0                       1                    2                      3                   4               5                           6                          7                    8                   9    */
04021             
04022          INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
04023         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
04024         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
04025         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
04026         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,GB2312_1               ,INVALID_STATE          ,INVALID_STATE          ,CNS_11643              ,CNS_11643     
04027         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE
04028         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
04029         ,INVALID_STATE          ,INVALID_STATE
04030     },
04031     
04032     {
04033 /*      0                       1                    2                      3                   4               5                           6                          7                    8                   9    */
04034          INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
04035         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
04036         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
04037         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          
04038         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,GB2312_1               ,INVALID_STATE          ,ISO_IR_165             ,CNS_11643              ,CNS_11643     
04039         ,CNS_11643              ,CNS_11643          ,CNS_11643          ,CNS_11643      ,   CNS_11643           ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE
04040         ,INVALID_STATE          ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE      ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE          ,INVALID_STATE              
04041         ,INVALID_STATE          ,INVALID_STATE
04042         
04043     }
04044 };
04045 
04046 static void changeState_2022(UConverter* _this,
04047                                 const char** source, 
04048                                 const char* sourceLimit,
04049                                 UBool flush,Variant2022 var,
04050                                 int* plane,
04051                                 UErrorCode* err){
04052     UConverter* myUConverter;
04053     UCNV_TableStates_2022 value;
04054     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
04055     uint32_t key = myData2022->key;
04056     const char* chosenConverterName = NULL;
04057 /*    const char* sourceStart =*source;*/
04058     char c;
04059     char cnvName[20];
04060     int32_t offset;
04061     
04062     /*In case we were in the process of consuming an escape sequence
04063     we need to reprocess it */
04064     
04065     do{
04066         
04067         value = getKey_2022(**source,(int32_t *) &key, &offset);
04068         switch (value){
04069             case VALID_NON_TERMINAL_2022 : 
04070                 break;
04071             
04072             case VALID_TERMINAL_2022:
04073                 {
04074                     (*source)++;
04075                     chosenConverterName = escSeqStateTable_Result_2022[offset];
04076                     key = 0;
04077                     goto DONE;
04078                 };
04079                 break;
04080             
04081             case INVALID_2022:
04082                 {
04083                     myData2022->key = 0;
04084                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
04085                     return;
04086                 }
04087             case VALID_SS2_SEQUENCE:
04088                 {
04089                     (*source)++;
04090                     key = 0;
04091                     goto DONE;
04092                 }
04093             case VALID_SS3_SEQUENCE:
04094                 {
04095                     (*source)++;
04096                     key = 0;
04097                     goto DONE;
04098                 }
04099             
04100             case VALID_MAYBE_TERMINAL_2022:
04101                 {
04102                     const char* mySource = (*source+1);
04103                     int32_t myKey = key;
04104                     UCNV_TableStates_2022 myValue = value;
04105                     int32_t myOffset=0;
04106                     if(*mySource==ESC_2022){
04107                         while ((mySource < sourceLimit) && 
04108                             ((myValue == VALID_MAYBE_TERMINAL_2022)||(myValue == VALID_NON_TERMINAL_2022))){
04109                             myValue = getKey_2022(*(mySource++), &myKey, &myOffset);
04110                         }
04111                     }
04112                     else{
04113                         (*source)++;
04114                         myValue=(UCNV_TableStates_2022) 1;
04115                         myOffset = 7;
04116                     }
04117                 
04118                     switch (myValue){
04119                         case INVALID_2022:
04120                             {
04121                                 /*Backs off*/
04122                                 chosenConverterName = escSeqStateTable_Result_2022[offset];
04123                                 value = VALID_TERMINAL_2022;
04124                                 goto DONE;
04125                             };
04126                             break;
04127                     
04128                         case VALID_TERMINAL_2022:
04129                             {
04130                                 /*uses longer escape sequence*/
04131                                 chosenConverterName = escSeqStateTable_Result_2022[myOffset];
04132                                 key = 0;
04133                                 value = VALID_TERMINAL_2022;
04134                                 goto DONE;
04135                             };
04136                             break;
04137                     
04138                         /* Not expected. Added to make the gcc happy */
04139                         case VALID_SS2_SEQUENCE:
04140                             {
04141                                 (*source)++;
04142                                 key = 0;
04143                                 goto DONE;
04144                             }
04145                         /* Not expected. Added to make the gcc happy */
04146                         case VALID_SS3_SEQUENCE:
04147                             {
04148                                 (*source)++;
04149                                 key = 0;
04150                                 goto DONE;
04151                             }
04152                     
04153                         case VALID_NON_TERMINAL_2022: 
04154                     
04155                         case VALID_MAYBE_TERMINAL_2022:
04156                             {
04157                                 if (flush){
04158                                     /*Backs off*/
04159                                     chosenConverterName = escSeqStateTable_Result_2022[offset];
04160                                     value = VALID_TERMINAL_2022;
04161                                     key = 0;
04162                                     goto DONE;
04163                                 }
04164                                 else{
04165                                     key = myKey;
04166                                     value = VALID_NON_TERMINAL_2022;
04167                                 }
04168                             };
04169                             break;
04170                     };
04171                     break;
04172                 };
04173                 break;
04174         }
04175     }while (++(*source) < sourceLimit);
04176     
04177 DONE:
04178     myData2022->key = key;
04179     if(chosenConverterName){
04180         if(uprv_strstr(chosenConverterName,"CNS")!=NULL){
04181             int i=0;
04182             while((c=*chosenConverterName)!=0 && c!=UCNV_OPTION_SEP_CHAR ) {
04183                 
04184                 cnvName[i++]=c;
04185                 ++chosenConverterName;
04186             }
04187             cnvName[i]=0;
04188             if(c==UCNV_OPTION_SEP_CHAR){
04189                 chosenConverterName++;
04190                 *plane = atoi(chosenConverterName);
04191             }
04192             
04193         }
04194         else{
04195             uprv_strcpy(cnvName,chosenConverterName);
04196             *plane=0;
04197         }
04198     }
04199     if ((value == VALID_NON_TERMINAL_2022) || (value == VALID_MAYBE_TERMINAL_2022)) {
04200         return;
04201     }
04202     if (value > 0 ) {
04203         if(value==3 || value==4 ){
04204             _this->mode = UCNV_SI;
04205             myUConverter =myData2022->currentConverter;
04206         }
04207         else{
04208             switch(var){
04209                 case ISO_2022:
04210                     _this->mode = UCNV_SI;
04211                     ucnv_close(myData2022->currentConverter);
04212                     myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
04213                     break;
04214                 case ISO_2022_JP:
04215                     {
04216                          StateEnum tempState=nextStateToUnicodeJP[myData2022->version][offset];
04217                         _this->mode = UCNV_SI;
04218                         myData2022->currentConverter = myUConverter = 
04219                             (tempState!=INVALID_STATE)? myData2022->myConverterArray[tempState]:NULL; 
04220                         myData2022->toUnicodeCurrentState = tempState;  
04221                         *err= (tempState==INVALID_STATE)?U_ILLEGAL_ESCAPE_SEQUENCE :U_ZERO_ERROR;
04222                     }
04223                     break;
04224                 case ISO_2022_CN:
04225                     {
04226                          StateEnumCN tempState=nextStateToUnicodeCN[myData2022->version][offset];
04227                         _this->mode = UCNV_SI;
04228                         myData2022->currentConverter = myUConverter = 
04229                             (tempState!=INVALID_STATE)? myData2022->myConverterArray[tempState]:NULL; 
04230                         myData2022->toUnicodeCurrentState =(StateEnum) tempState;
04231                         *err= (tempState==INVALID_STATE)?U_ILLEGAL_ESCAPE_SEQUENCE :U_ZERO_ERROR;
04232                     }
04233                     break;
04234                 default:
04235                     myUConverter=NULL;
04236                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
04237             }
04238             
04239         }
04240         if (U_SUCCESS(*err)){
04241             /*Customize the converter with the attributes set on the 2022 converter*/
04242             myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour;
04243             myUConverter->fromUContext = _this->fromUContext;
04244             myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour;
04245             myUConverter->toUContext = _this->toUContext;
04246             
04247             uprv_memcpy(myUConverter->subChar, 
04248                 _this->subChar,
04249                 myUConverter->subCharLen = _this->subCharLen);
04250             
04251             _this->mode = UCNV_SO;
04252         }
04253     }
04254     
04255     return;
04256 }  
04257 
04258 U_CFUNC void UConverter_toUnicode_ISO_2022_CN(UConverterToUnicodeArgs *args,
04259                                               UErrorCode* err){
04260     char tempBuf[3];
04261     int plane=0;
04262     const char* pBuf;
04263     const char *mySource = ( char *) args->source;
04264     UChar *myTarget = args->target;
04265     char *tempLimit = &tempBuf[2]+1; 
04266     int32_t mySourceIndex = 0;
04267     int32_t myTargetIndex = 0;
04268     const char *mySourceLimit = args->sourceLimit;
04269     UChar32 targetUniChar = 0x0000;
04270     UChar mySourceChar = 0x0000;
04271     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
04272     CompactShortArray *myToUnicodeDBCS=NULL, *myToUnicodeFallbackDBCS = NULL; 
04273     
04274     plane=myData->plane;
04275     /*Arguments Check*/
04276     if (U_FAILURE(*err)) 
04277         return;
04278     
04279     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
04280         *err = U_ILLEGAL_ARGUMENT_ERROR;
04281         return;
04282     }
04283     
04284     while(mySource< args->sourceLimit){
04285         
04286         if(myTarget < args->targetLimit){
04287             
04288             mySourceChar= (unsigned char) *mySource++;
04289             
04290             if(args->converter->mode==UCNV_SI){
04291                     
04292                /* if there are no escape sequences in the first buffer then they
04293                 * are assumed to be ASCII according to RFC-1922
04294                 */
04295                     
04296                     myData->currentType = ASCII1;
04297                     myData->plane=plane = 0;
04298              }
04299             
04300             switch(mySourceChar){
04301                 case 0x0A:
04302                     if(args->converter->toUnicodeStatus != 0x00){
04303                         goto SAVE_STATE;
04304                     }
04305                     myData->currentType = ASCII1;
04306                     myData->plane=plane = 0;
04307                     break;
04308                 
04309                 case 0x0D:
04310                     if(args->converter->toUnicodeStatus != 0x00){
04311                         goto SAVE_STATE;
04312                     }
04313                     myData->currentType = ASCII1;
04314                     myData->plane=plane = 0;
04315                     break;
04316                 
04317                 case UCNV_SI:
04318                     if(args->converter->toUnicodeStatus != 0x00){
04319                         goto SAVE_STATE;
04320                     }
04321                     myData->currentType = ASCII1;
04322                     myData->plane=plane = 0;
04323                     continue;
04324                 
04325                 case UCNV_SO:
04326                     if(args->converter->toUnicodeStatus != 0x00){
04327                         goto SAVE_STATE;
04328                     }
04329                 
04330                     myData->currentType = (plane>0) ? MBCS: DBCS;
04331                     continue;
04332                             
04333                 default:
04334                     /* if we are in the middle of consuming an escape sequence 
04335                      * we continue to next switch tag else we break
04336                      */
04337                     if(myData->key==0){
04338                         break;
04339                     }
04340                 case ESC_2022:
04341                     if(args->converter->toUnicodeStatus != 0x00){
04342                         goto SAVE_STATE;
04343                     }
04344                     mySource--;
04345                     changeState_2022(args->converter,&(mySource), 
04346                         args->sourceLimit, args->flush,ISO_2022_CN,&plane,err);
04347                 
04348                     myData->plane=plane;
04349                     if(plane>0){
04350                         myData->currentType = MBCS;
04351                     }
04352                     else if(myData->currentConverter &&  
04353                                 uprv_stricmp("latin_1", 
04354                                 myData->currentConverter->sharedData->staticData->name)==0){
04355                     
04356                         myData->currentType=ASCII1;
04357                     }
04358                     if(U_FAILURE(*err)){
04359                             return;
04360                     }
04361                     continue;
04362             }
04363             
04364             switch(myData->currentType){
04365                 
04366                 case ASCII1:
04367                 
04368                     if(args->converter->fromUnicodeStatus == 0x00){
04369                         targetUniChar = (UChar) mySourceChar;
04370                     }
04371                     else{
04372                         goto SAVE_STATE;
04373                     }
04374                     break;
04375                 
04376                 case DBCS:
04377                     myToUnicodeDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicode;
04378                     myToUnicodeFallbackDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicodeFallback;
04379                 
04380                     if(args->converter->toUnicodeStatus == 0x00){
04381                         args->converter->toUnicodeStatus = (UChar) mySourceChar;
04382                         continue;
04383                     }
04384                     else{
04385                         tempBuf[0] = (char) args->converter->toUnicodeStatus ;
04386                         tempBuf[1] = (char) mySourceChar;
04387                         mySourceChar= (UChar)((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00ff));
04388                         args->converter->toUnicodeStatus =0x00;
04389                     
04390                         targetUniChar = ucmp16_getu(myToUnicodeDBCS,mySourceChar); 
04391                         /* There are no fallbacks in ISO-IR-165 or GB_2312_1 so we can
04392                          * safely ignore the codepaths below
04393                          */
04394                         /*if(targetUniChar> 0xfffe){
04395                             if(UCNV_TO_U_USE_FALLBACK(args->converter) && 
04396                                 (myData->currentConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
04397                             
04398                                 targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallbackDBCS, mySourceChar);
04399                             }
04400                         } */
04401                     }
04402                 
04403                     break;
04404                 
04405                 case MBCS:
04406                 
04407                     if(args->converter->toUnicodeStatus == 0x00){
04408                         args->converter->toUnicodeStatus = (UChar) mySourceChar;
04409                         continue;
04410                     }
04411                     else{
04412                         tempBuf[0] = (char)( 0x80+plane);
04413                         tempBuf[1] = (char) (args->converter->toUnicodeStatus);
04414                         tempBuf[2] = (char) (mySourceChar);
04415                         args->converter->toUnicodeStatus = 0;
04416                     
04417                         pBuf = &tempBuf[0];
04418                         tempLimit = &tempBuf[2]+1;
04419                         targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData,
04420                             &pBuf,tempLimit,args->converter->useFallback);
04421                     }
04422                     break;
04423                 /* for LATIN1 and SBCS which are not expected */
04424                 default:
04425                     break;
04426 
04427             }
04428             if(targetUniChar < 0xfffe){
04429                 *(myTarget++)=(UChar)targetUniChar;
04430             }
04431             else if(targetUniChar>=0xfffe){
04432 SAVE_STATE:
04433                 {
04434                     const char *saveSource = args->source;
04435                     UChar *saveTarget = args->target;
04436                     int32_t *saveOffsets = args->offsets;
04437                     UConverterCallbackReason reason;
04438                 
04439                     if(targetUniChar == 0xfffe){
04440                         reason = UCNV_UNASSIGNED;
04441                         *err = U_INVALID_CHAR_FOUND;
04442                     }
04443                     else{
04444                         reason = UCNV_ILLEGAL;
04445                         *err = U_ILLEGAL_CHAR_FOUND;
04446                     }
04447                     switch(myData->currentType){
04448                         case ASCII1:
04449                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar;
04450                             break;
04451 
04452                         case DBCS:
04453                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[0];
04454                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[1];
04455                             break;
04456 
04457                         case MBCS:
04458                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[1];
04459                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[2];
04460                     }
04461 
04462                     args->target = myTarget;
04463                     args->source = mySource;
04464                     ToU_CALLBACK_MACRO( args->converter->toUContext,
04465                         args,
04466                         args->converter->invalidCharBuffer,
04467                         args->converter->invalidCharLength,
04468                         reason,
04469                         err);
04470                     myTarget += args->target - myTarget;
04471                     args->source = saveSource;
04472                     args->target = saveTarget;
04473                     args->offsets = saveOffsets;
04474                     args->converter->invalidCharLength=0;
04475 
04476                     if(U_FAILURE(*err))
04477                         break;
04478 
04479                 }
04480             }
04481         }
04482         else{
04483             *err =U_BUFFER_OVERFLOW_ERROR;
04484             break;
04485         }
04486     }
04487     if((args->flush==TRUE)
04488         && (mySource == mySourceLimit) 
04489         && ( args->converter->toUnicodeStatus !=0x00)){
04490         if(U_SUCCESS(*err)){
04491             *err = U_TRUNCATED_CHAR_FOUND;
04492             args->converter->toUnicodeStatus = 0x00;
04493         }
04494     }
04495     
04496     /* Reset the state of converter if we consumed 
04497      * the source and flush is true
04498      */
04499     if( (mySource == mySourceLimit) && args->flush){
04500         setInitialStateToUnicodeJPCN(args->converter,myData);
04501     }
04502 
04503 
04504     args->target = myTarget;
04505     args->source = mySource;
04506 }
04507 
04508 U_CFUNC void UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
04509                                                             UErrorCode* err){
04510     char tempBuf[3];
04511     int plane=0;
04512     const char* pBuf;
04513     const char *mySource = ( char *) args->source;
04514     UChar *myTarget = args->target;
04515     char *tempLimit = &tempBuf[3]; 
04516     int32_t mySourceIndex = 0;
04517     int32_t myTargetIndex = 0;
04518     const char *mySourceLimit = args->sourceLimit;
04519     UChar32 targetUniChar = 0x0000;
04520     UChar mySourceChar = 0x0000;
04521     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
04522     CompactShortArray *myToUnicodeDBCS=NULL, *myToUnicodeFallbackDBCS = NULL; 
04523     
04524     /*Arguments Check*/
04525     if (U_FAILURE(*err)) 
04526         return;
04527     
04528     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
04529         *err = U_ILLEGAL_ARGUMENT_ERROR;
04530         return;
04531     }
04532     
04533     while(mySource< args->sourceLimit){
04534         
04535         if(myTarget < args->targetLimit){
04536             
04537             mySourceChar= (unsigned char) *mySource++;
04538             
04539             
04540             switch(mySourceChar){
04541             case 0x0A:
04542                 if(args->converter->toUnicodeStatus != 0x00){
04543                     goto SAVE_STATE;
04544                 }
04545                 myData->currentType = ASCII1;
04546                 myData->plane=plane = 0;
04547                 break;
04548                 
04549             case 0x0D:
04550                 if(args->converter->toUnicodeStatus != 0x00){
04551                     goto SAVE_STATE;
04552                 }
04553                 myData->currentType = ASCII1;
04554                 myData->plane=plane = 0;
04555                 break;
04556                 
04557             case UCNV_SI:
04558                 if(args->converter->toUnicodeStatus != 0x00){
04559                     goto SAVE_STATE;
04560                 }
04561                 myData->currentType = ASCII1;
04562                 myData->plane=plane = 0;
04563                 continue;
04564                 
04565             case UCNV_SO:
04566                 if(args->converter->toUnicodeStatus != 0x00){
04567                     goto SAVE_STATE;
04568                 }
04569                 
04570                 myData->currentType = (plane>0) ? MBCS: DBCS;
04571                 continue;
04572                 
04573             default:
04574                 /* if we are in the middle of consuming an escape sequence 
04575                  * we continue to next switch tag else we break
04576                  */
04577                 if(myData->key==0){
04578                     break;
04579                 }
04580             case ESC_2022:
04581                 if(args->converter->toUnicodeStatus != 0x00){
04582                     goto SAVE_STATE;
04583                 }
04584                 mySource--;
04585                 changeState_2022(args->converter,&(mySource), 
04586                     args->sourceLimit, args->flush,ISO_2022_CN,&plane,err);
04587                 
04588                 myData->plane=plane;
04589                 if(plane>0){
04590                     myData->currentType = MBCS;
04591                 }
04592                 else if(myData->currentConverter &&  
04593                             uprv_stricmp("latin_1", 
04594                             myData->currentConverter->sharedData->staticData->name)==0){
04595                     
04596                     myData->currentType=ASCII1;
04597                 }
04598                 if(U_FAILURE(*err)){
04599                     return;
04600                 }
04601                 continue;
04602             
04603             }
04604             
04605             switch(myData->currentType){
04606                 
04607             case ASCII1:
04608                 
04609                 if(args->converter->fromUnicodeStatus == 0x00){
04610                     targetUniChar = (UChar) mySourceChar;
04611                 }
04612                 else{
04613                     goto SAVE_STATE;
04614                 }
04615                 break;
04616                 
04617             case DBCS:
04618                 myToUnicodeDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicode;
04619                 myToUnicodeFallbackDBCS = &myData->currentConverter->sharedData->table->dbcs.toUnicodeFallback;
04620                 
04621                 if(args->converter->toUnicodeStatus == 0x00){
04622                     args->converter->toUnicodeStatus = (UChar) mySourceChar;
04623                     continue;
04624                 }
04625                 else{
04626                     tempBuf[0] = (char) args->converter->toUnicodeStatus ;
04627                     tempBuf[1] = (char) mySourceChar;
04628                     mySourceChar= (UChar)((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00ff));
04629                     args->converter->toUnicodeStatus =0x00;
04630                     
04631                     targetUniChar = ucmp16_getu(myToUnicodeDBCS,mySourceChar);
04632                     /* There are no fallbacks in ISO-IR-165 or GB_2312_1 so we can
04633                      * safely ignore the codepaths below
04634                      */
04635                     /*
04636                     if(targetUniChar> 0xfffe){
04637                         if(UCNV_TO_U_USE_FALLBACK(args->converter) && 
04638                             (myData->currentConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
04639                             
04640                             targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallbackDBCS, mySourceChar);
04641                         }
04642                     } */
04643                 }
04644                 
04645                 break;
04646                 
04647             case MBCS:
04648                 
04649                 if(args->converter->toUnicodeStatus == 0x00){
04650                     args->converter->toUnicodeStatus = (UChar) mySourceChar;
04651                     continue;
04652                 }
04653                 else{
04654                     tempBuf[0] = (char) (0x80+plane);
04655                     tempBuf[1] = (char) (args->converter->toUnicodeStatus);
04656                     tempBuf[2] = (char) (mySourceChar);
04657                     args->converter->toUnicodeStatus = 0x00;
04658                     pBuf = &tempBuf[0];
04659                     tempLimit = &tempBuf[2]+1;
04660                     targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData,
04661                         &pBuf,tempLimit,args->converter->useFallback);
04662                 }
04663                 break;
04664             /* for LATIN1 and SBCS which are not expected */    
04665             default:
04666                 break;
04667             }
04668             if(targetUniChar < 0xfffe){
04669                 if(myData->currentType == ASCII1){
04670                     args->offsets[myTarget - args->target]= mySource - args->source - 1;
04671                 }
04672                 else{
04673                     args->offsets[myTarget - args->target]= mySource - args->source - 2;
04674                 }
04675                 *(myTarget++)=(UChar)targetUniChar;
04676             }
04677             else if(targetUniChar>=0xfffe){
04678 SAVE_STATE:
04679                 {
04680                     const char *saveSource = args->source;
04681                     UChar *saveTarget = args->target;
04682                     int32_t *saveOffsets = args->offsets;
04683                     UConverterCallbackReason reason;
04684                     int32_t currentOffset ;
04685                     int32_t My_i = myTarget - args->target;
04686                     
04687                     
04688                     switch(myData->currentType){
04689                         case ASCII1:
04690                             currentOffset= mySource - args->source -1;
04691                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar;
04692                             break;
04693 
04694                         case DBCS:
04695                             currentOffset= mySource - args->source -2;
04696                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[0];
04697                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[1];
04698                             break;
04699 
04700                         case MBCS:
04701                             currentOffset= mySource - args->source -2;
04702                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[1];
04703                             args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = tempBuf[2];
04704                             break;
04705 
04706                         default:
04707                             currentOffset = mySource - args->source -1;
04708                     }
04709 
04710                     /*reason = (targetUniChar == 0xfffe) ? UCNV_UNASSIGNED:UCNV_ILLEGAL;
04711                     *err = (targetUniChar == 0xfffe) ? U_INVALID_CHAR_FOUND : U_ILLEGAL_CHAR_FOUND;*/
04712 
04713                     if(targetUniChar == 0xfffe){
04714                         reason = UCNV_UNASSIGNED;
04715                         *err = U_INVALID_CHAR_FOUND;
04716                     }
04717                     else{
04718                         reason = UCNV_ILLEGAL;
04719                         *err = U_ILLEGAL_CHAR_FOUND;
04720                     }
04721 
04722                     
04723                     args->offsets = args->offsets?args->offsets+(myTarget - args->target):0;
04724                     args->target =myTarget;
04725                     myTarget =saveTarget;
04726                     ToU_CALLBACK_OFFSETS_LOGIC_MACRO( args->converter->toUContext,
04727                         args,
04728                         args->converter->invalidCharBuffer,
04729                         args->converter->invalidCharLength,
04730                         reason,
04731                         err);
04732                     args->converter->invalidCharLength=0;
04733                     myTarget=args->target;
04734                     args->source  = saveSource;
04735                     args->target  = saveTarget;
04736                     args->offsets = saveOffsets;
04737 
04738                     if(U_FAILURE(*err))
04739                         break;
04740 
04741                 }
04742             }
04743         }
04744         else{
04745             *err =U_BUFFER_OVERFLOW_ERROR;
04746             break;
04747         }
04748     }
04749     if((args->flush==TRUE)
04750         && (mySource == mySourceLimit) 
04751         && ( args->converter->toUnicodeStatus !=0x00)){
04752         if(U_SUCCESS(*err)){
04753             *err = U_TRUNCATED_CHAR_FOUND;
04754             args->converter->toUnicodeStatus = 0x00;
04755         }
04756     }
04757     /* Reset the state of converter if we consumed 
04758      * the source and flush is true
04759      */
04760     if( (mySource == mySourceLimit) && args->flush){
04761         setInitialStateToUnicodeJPCN(args->converter,myData);
04762     }
04763 
04764 
04765     args->target = myTarget;
04766     args->source = mySource;
04767 }
04768 
04769 /*
04770 * This is a simple, interim implementation of GetNextUChar()
04771 * that allows to concentrate on testing one single implementation
04772 * of the ToUnicode conversion before it gets copied to
04773 * multiple version that are then optimized for their needs
04774 * (with vs. without offsets and getNextUChar).
04775 */
04776 
04777 U_CFUNC UChar32
04778 UConverter_getNextUChar_ISO_2022_CN(UConverterToUnicodeArgs *pArgs,
04779                                     UErrorCode *pErrorCode) {
04780     UChar buffer[UTF_MAX_CHAR_LENGTH];
04781     const char *realLimit=pArgs->sourceLimit;
04782     
04783     pArgs->target=buffer;
04784     pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
04785     
04786     while(pArgs->source<realLimit) {
04787         /* feed in one byte at a time to make sure to get only one character out */
04788         pArgs->sourceLimit=pArgs->source+1;
04789         pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
04790         UConverter_toUnicode_ISO_2022_CN(pArgs, pErrorCode);
04791         if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
04792             return 0xffff;
04793         } else if(pArgs->target!=buffer) {
04794             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
04795                 *pErrorCode=U_ZERO_ERROR;
04796             }
04797             return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
04798         }
04799     }
04800     
04801     /* no output because of empty input or only state changes and skipping callbacks */
04802     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
04803     return 0xffff;
04804 }

Generated at Tue Dec 5 10:48:00 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000