00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "unicode/utypes.h"
00034 #include "unicode/ucnv.h"
00035 #include "unicode/ucnv_cb.h"
00036 #include "ucnv_bld.h"
00037 #include "ucnvmbcs.h"
00038 #include "ucnv_cnv.h"
00039 #include "cstring.h"
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172 U_CFUNC void
00173 _MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
00174 UErrorCode *pErrorCode);
00175
00176 U_CFUNC UChar32
00177 _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
00178 uint8_t b, UBool useFallback);
00179
00180 U_CFUNC void
00181 _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
00182 UErrorCode *pErrorCode);
00183
00184 static void
00185 fromUCallback(UConverter *cnv,
00186 void *context, UConverterFromUnicodeArgs *pArgs,
00187 const UChar *codeUnits, int32_t length, UChar32 codePoint,
00188 UConverterCallbackReason reason, UErrorCode *pErrorCode);
00189
00190 static void
00191 toUCallback(UConverter *cnv,
00192 void *context, UConverterToUnicodeArgs *pArgs,
00193 const char *codeUnits, int32_t length,
00194 UConverterCallbackReason reason, UErrorCode *pErrorCode);
00195
00196
00197
00198
00199 #define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d))
00200
00201 #define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
00202
00203 #define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
00204
00205
00206
00207
00208
00209
00210
00211 static const uint32_t
00212 gb18030Ranges[13][4]={
00213 0x10000, 0x10ffff, LINEAR(0x90308130), LINEAR(0xe3329a35),
00214 0x9fa6, 0xdfff, LINEAR(0x82358f34), LINEAR(0x83389837),
00215 0x0452, 0x200f, LINEAR(0x8130d239), LINEAR(0x8136a530),
00216 0xe865, 0xf92b, LINEAR(0x83389838), LINEAR(0x8431cc32),
00217 0x2643, 0x2e80, LINEAR(0x8137a838), LINEAR(0x8138fd37),
00218 0xfa2a, 0xfe2f, LINEAR(0x8431e336), LINEAR(0x8432cc35),
00219 0x3ce1, 0x4055, LINEAR(0x8231d439), LINEAR(0x8232af33),
00220 0x361b, 0x3917, LINEAR(0x8230a634), LINEAR(0x8230f238),
00221 0x49b8, 0x4c76, LINEAR(0x8234a132), LINEAR(0x8234e734),
00222 0x4160, 0x4336, LINEAR(0x8232c938), LINEAR(0x8232f838),
00223 0x478e, 0x4946, LINEAR(0x8233e839), LINEAR(0x82349639),
00224 0x44d7, 0x464b, LINEAR(0x8233a430), LINEAR(0x8233c932),
00225 0xffe6, 0xffff, LINEAR(0x8432e932), LINEAR(0x8432eb37)
00226 };
00227
00228
00229
00230 U_CFUNC void
00231 _MBCSLoad(UConverterSharedData *sharedData,
00232 const uint8_t *raw,
00233 UErrorCode *pErrorCode) {
00234 UConverterMBCSTable *mbcsTable=&sharedData->table->mbcs;
00235 _MBCSHeader *header=(_MBCSHeader *)raw;
00236
00237 if(header->version[0]!=1) {
00238 *pErrorCode=U_INVALID_TABLE_FORMAT;
00239 return;
00240 }
00241
00242 mbcsTable->countStates=(uint8_t)header->countStates;
00243 mbcsTable->countToUFallbacks=header->countToUFallbacks;
00244 mbcsTable->stateTable=(const int32_t (*)[256])(raw+sizeof(_MBCSHeader));
00245 mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
00246 mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
00247
00248 mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
00249 mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
00250 mbcsTable->outputType=(uint8_t)header->flags;
00251 }
00252
00253 U_CFUNC void
00254 _MBCSReset(UConverter *cnv) {
00255
00256 cnv->toUnicodeStatus=0;
00257 cnv->mode=0;
00258 cnv->toULength=0;
00259
00260
00261 cnv->fromUSurrogateLead=0;
00262 }
00263
00264 U_CFUNC void
00265 _MBCSOpen(UConverter *cnv,
00266 const char *name,
00267 const char *locale,
00268 uint32_t options,
00269 UErrorCode *pErrorCode) {
00270 _MBCSReset(cnv);
00271 if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
00272
00273 cnv->extraInfo=(void *)gb18030Ranges;
00274 }
00275 }
00276
00277
00278
00279 static UChar32
00280 _MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
00281 const _MBCSToUFallback *toUFallbacks;
00282 uint32_t i, start, limit;
00283
00284 limit=mbcsTable->countToUFallbacks;
00285 if(limit>0) {
00286
00287 toUFallbacks=mbcsTable->toUFallbacks;
00288 start=0;
00289 while(start<limit-1) {
00290 i=(start+limit)/2;
00291 if(offset<toUFallbacks[i].offset) {
00292 limit=i;
00293 } else {
00294 start=i;
00295 }
00296 }
00297
00298
00299 if(offset==toUFallbacks[start].offset) {
00300 return toUFallbacks[start].codePoint;
00301 }
00302 }
00303
00304 return 0xfffe;
00305 }
00306
00307 U_CFUNC void
00308 _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
00309 UErrorCode *pErrorCode) {
00310
00311 UConverter *cnv;
00312 const uint8_t *source, *sourceLimit;
00313 UChar *target;
00314 const UChar *targetLimit;
00315 int32_t *offsets;
00316
00317 const int32_t (*stateTable)[256];
00318 const uint16_t *unicodeCodeUnits;
00319
00320 uint32_t offset;
00321 uint8_t state;
00322 int8_t byteIndex;
00323 uint8_t *bytes;
00324
00325 int32_t sourceIndex, nextSourceIndex;
00326
00327 int32_t entry;
00328 UChar c;
00329 uint8_t b;
00330 UConverterCallbackReason reason;
00331
00332
00333 cnv=pArgs->converter;
00334 if(cnv->sharedData->table->mbcs.countStates==1) {
00335 _MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
00336 return;
00337 }
00338
00339
00340 source=(const uint8_t *)pArgs->source;
00341 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
00342 target=pArgs->target;
00343 targetLimit=pArgs->targetLimit;
00344 offsets=pArgs->offsets;
00345
00346 stateTable=cnv->sharedData->table->mbcs.stateTable;
00347 unicodeCodeUnits=cnv->sharedData->table->mbcs.unicodeCodeUnits;
00348
00349
00350 offset=cnv->toUnicodeStatus;
00351 state=(uint8_t)(cnv->mode);
00352 byteIndex=cnv->toULength;
00353 bytes=cnv->toUBytes;
00354
00355
00356 sourceIndex=byteIndex==0 ? 0 : -1;
00357 nextSourceIndex=0;
00358
00359
00360 while(source<sourceLimit) {
00361
00362
00363
00364
00365
00366
00367
00368
00369 if(target<targetLimit) {
00370 bytes[byteIndex++]=b=*source++;
00371 ++nextSourceIndex;
00372 entry=stateTable[state][b];
00373 if(entry>=0) {
00374
00375
00376
00377
00378
00379 state=(uint8_t)(entry&0x7f);
00380 offset+=entry>>7;
00381 } else {
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391 state=(uint8_t)(entry&0x7f);
00392
00393
00394 switch((uint32_t)entry>>27U) {
00395 case 16|MBCS_STATE_ILLEGAL:
00396
00397
00398 reason=UCNV_ILLEGAL;
00399 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
00400 goto callback;
00401 case 16|MBCS_STATE_CHANGE_ONLY:
00402
00403
00404
00405
00406
00407
00408
00409
00410 break;
00411 case 16|MBCS_STATE_UNASSIGNED:
00412
00413
00414 reason=UCNV_UNASSIGNED;
00415 *pErrorCode=U_INVALID_CHAR_FOUND;
00416 goto callback;
00417 case 16|MBCS_STATE_FALLBACK_DIRECT_16:
00418
00419
00420 if(!UCNV_TO_U_USE_FALLBACK(cnv)) {
00421
00422 reason=UCNV_UNASSIGNED;
00423 *pErrorCode=U_INVALID_CHAR_FOUND;
00424 goto callback;
00425 }
00426
00427 case 16|MBCS_STATE_VALID_DIRECT_16:
00428
00429
00430
00431 *target++=(UChar)(entry>>7);
00432 if(offsets!=NULL) {
00433 *offsets++=sourceIndex;
00434 }
00435 break;
00436 case 16|MBCS_STATE_FALLBACK_DIRECT_20:
00437
00438 if(!UCNV_TO_U_USE_FALLBACK(cnv)) {
00439
00440 reason=UCNV_UNASSIGNED;
00441 *pErrorCode=U_INVALID_CHAR_FOUND;
00442 goto callback;
00443 }
00444
00445 case 16|MBCS_STATE_VALID_DIRECT_20:
00446
00447 entry=(entry>>7)&0xfffff;
00448
00449 *target++=(UChar)(0xd800|(UChar)(entry>>10));
00450 if(offsets!=NULL) {
00451 *offsets++=sourceIndex;
00452 }
00453 c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
00454 if(target<targetLimit) {
00455 *target++=c;
00456 if(offsets!=NULL) {
00457 *offsets++=sourceIndex;
00458 }
00459 } else {
00460
00461 cnv->UCharErrorBuffer[0]=c;
00462 cnv->UCharErrorBufferLength=1;
00463 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00464
00465 offset=0;
00466 byteIndex=0;
00467 goto endloop;
00468 }
00469 break;
00470 case 16|MBCS_STATE_VALID_16:
00471
00472
00473 offset+=(uint16_t)entry>>7;
00474 c=unicodeCodeUnits[offset];
00475 if(c<0xfffe) {
00476
00477 *target++=c;
00478 if(offsets!=NULL) {
00479 *offsets++=sourceIndex;
00480 }
00481 } else if(c==0xfffe) {
00482 if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)_MBCSGetFallback(&cnv->sharedData->table->mbcs, offset))!=0xfffe) {
00483 goto output32;
00484 }
00485
00486 reason=UCNV_UNASSIGNED;
00487 *pErrorCode=U_INVALID_CHAR_FOUND;
00488 goto callback;
00489 } else {
00490
00491 reason=UCNV_ILLEGAL;
00492 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
00493 goto callback;
00494 }
00495 break;
00496 case 16|MBCS_STATE_VALID_16_PAIR:
00497
00498
00499 offset+=(uint16_t)entry>>7;
00500 c=unicodeCodeUnits[offset++];
00501 if(UTF_IS_FIRST_SURROGATE(c)) {
00502 *target++=c;
00503 if(offsets!=NULL) {
00504 *offsets++=sourceIndex;
00505 }
00506 if(target<targetLimit) {
00507 *target++=unicodeCodeUnits[offset];
00508 if(offsets!=NULL) {
00509 *offsets++=sourceIndex;
00510 }
00511 } else {
00512
00513 cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
00514 cnv->UCharErrorBufferLength=1;
00515 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00516
00517 offset=0;
00518 byteIndex=0;
00519 goto endloop;
00520 }
00521 } else if(c<0xfffe) {
00522
00523 *target++=c;
00524 if(offsets!=NULL) {
00525 *offsets++=sourceIndex;
00526 }
00527 } else if(c==0xfffe) {
00528
00529
00530
00531
00532 if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)_MBCSGetFallback(&cnv->sharedData->table->mbcs, offset-1))!=0xfffe) {
00533 goto output32;
00534 }
00535
00536 reason=UCNV_UNASSIGNED;
00537 *pErrorCode=U_INVALID_CHAR_FOUND;
00538 goto callback;
00539 } else {
00540
00541 reason=UCNV_ILLEGAL;
00542 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
00543 goto callback;
00544 }
00545 break;
00546 default:
00547
00548
00549 break;
00550 }
00551
00552
00553 offset=0;
00554 byteIndex=0;
00555 sourceIndex=nextSourceIndex;
00556 continue;
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588 output32:
00589
00590 if(entry<=0xffff) {
00591
00592 *target++=(UChar)entry;
00593 if(offsets!=NULL) {
00594 *offsets++=sourceIndex;
00595 }
00596 } else {
00597
00598 *target++=(UChar)(0xd7c0+(entry>>10));
00599 if(offsets!=NULL) {
00600 *offsets++=sourceIndex;
00601 }
00602 c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
00603 if(target<targetLimit) {
00604 *target++=c;
00605 if(offsets!=NULL) {
00606 *offsets++=sourceIndex;
00607 }
00608 } else {
00609
00610 cnv->UCharErrorBuffer[0]=c;
00611 cnv->UCharErrorBufferLength=1;
00612 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00613
00614 offset=0;
00615 byteIndex=0;
00616 break;
00617 }
00618 }
00619
00620
00621 offset=0;
00622 byteIndex=0;
00623 sourceIndex=nextSourceIndex;
00624 continue;
00625
00626 callback:
00627
00628
00629 pArgs->source=(const char *)source;
00630 pArgs->target=target;
00631 pArgs->offsets=offsets;
00632
00633
00634 for(b=0; b<(uint8_t)byteIndex; ++b) {
00635 cnv->invalidCharBuffer[b]=(char)bytes[b];
00636 }
00637 cnv->invalidCharLength=byteIndex;
00638
00639
00640 cnv->toUnicodeStatus=0;
00641 cnv->mode=state;
00642 cnv->toULength=0;
00643
00644
00645 toUCallback(cnv, cnv->toUContext, pArgs, (const char *)bytes, byteIndex, reason, pErrorCode);
00646
00647
00648 offset=cnv->toUnicodeStatus;
00649 state=(uint8_t)cnv->mode;
00650 byteIndex=cnv->toULength;
00651
00652
00653 offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
00654 target=pArgs->target;
00655
00656
00657 sourceIndex=nextSourceIndex+((const uint8_t *)pArgs->source-source);
00658 source=(const uint8_t *)pArgs->source;
00659
00660
00661
00662
00663
00664 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
00665 break;
00666 } else if(cnv->UCharErrorBufferLength>0) {
00667
00668 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00669 break;
00670 } else if(U_FAILURE(*pErrorCode)) {
00671
00672 offset=0;
00673 state=0;
00674 byteIndex=0;
00675 break;
00676 }
00677
00678
00679
00680
00681
00682
00683 }
00684 } else {
00685
00686 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00687 break;
00688 }
00689 }
00690 endloop:
00691
00692 if(pArgs->flush && source>=sourceLimit) {
00693
00694 if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
00695
00696 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
00697 }
00698 cnv->toUnicodeStatus=0;
00699 cnv->mode=0;
00700 cnv->toULength=0;
00701 } else {
00702
00703 cnv->toUnicodeStatus=offset;
00704 cnv->mode=state;
00705 cnv->toULength=byteIndex;
00706 }
00707
00708
00709 pArgs->source=(const char *)source;
00710 pArgs->target=target;
00711 pArgs->offsets=offsets;
00712 }
00713
00714 U_CFUNC void
00715 _MBCSToUnicode(UConverterToUnicodeArgs *pArgs,
00716 UErrorCode *pErrorCode) {
00717 _MBCSToUnicodeWithOffsets(pArgs, pErrorCode);
00718 }
00719
00720
00721 U_CFUNC void
00722 _MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
00723 UErrorCode *pErrorCode) {
00724
00725 UConverter *cnv;
00726 const uint8_t *source, *sourceLimit;
00727 UChar *target;
00728 const UChar *targetLimit;
00729 int32_t *offsets;
00730
00731 const int32_t (*stateTable)[256];
00732
00733 int32_t sourceIndex, nextSourceIndex;
00734
00735 int32_t entry;
00736 UChar c;
00737 uint8_t b;
00738 UConverterCallbackReason reason;
00739
00740
00741 cnv=pArgs->converter;
00742 source=(const uint8_t *)pArgs->source;
00743 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
00744 target=pArgs->target;
00745 targetLimit=pArgs->targetLimit;
00746 offsets=pArgs->offsets;
00747
00748 stateTable=cnv->sharedData->table->mbcs.stateTable;
00749
00750
00751 sourceIndex=0;
00752 nextSourceIndex=0;
00753
00754
00755 while(source<sourceLimit) {
00756
00757
00758
00759
00760
00761
00762
00763
00764 if(target<targetLimit) {
00765 b=*source++;
00766 ++nextSourceIndex;
00767 entry=stateTable[0][b];
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778 switch((uint32_t)entry>>27U) {
00779 case 16|MBCS_STATE_ILLEGAL:
00780
00781
00782 reason=UCNV_ILLEGAL;
00783 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
00784 goto callback;
00785 case 16|MBCS_STATE_UNASSIGNED:
00786
00787
00788 reason=UCNV_UNASSIGNED;
00789 *pErrorCode=U_INVALID_CHAR_FOUND;
00790 goto callback;
00791 case 16|MBCS_STATE_FALLBACK_DIRECT_16:
00792
00793
00794 if(!UCNV_TO_U_USE_FALLBACK(cnv)) {
00795
00796 reason=UCNV_UNASSIGNED;
00797 *pErrorCode=U_INVALID_CHAR_FOUND;
00798 goto callback;
00799 }
00800
00801 case 16|MBCS_STATE_VALID_DIRECT_16:
00802
00803
00804
00805 *target++=(UChar)(entry>>7);
00806 if(offsets!=NULL) {
00807 *offsets++=sourceIndex;
00808 }
00809 break;
00810 case 16|MBCS_STATE_FALLBACK_DIRECT_20:
00811
00812 if(!UCNV_TO_U_USE_FALLBACK(cnv)) {
00813
00814 reason=UCNV_UNASSIGNED;
00815 *pErrorCode=U_INVALID_CHAR_FOUND;
00816 goto callback;
00817 }
00818
00819 case 16|MBCS_STATE_VALID_DIRECT_20:
00820
00821 entry=(entry>>7)&0xfffff;
00822
00823 *target++=(UChar)(0xd800|(UChar)(entry>>10));
00824 if(offsets!=NULL) {
00825 *offsets++=sourceIndex;
00826 }
00827 c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
00828 if(target<targetLimit) {
00829 *target++=c;
00830 if(offsets!=NULL) {
00831 *offsets++=sourceIndex;
00832 }
00833 } else {
00834
00835 cnv->UCharErrorBuffer[0]=c;
00836 cnv->UCharErrorBufferLength=1;
00837 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00838 goto endloop;
00839 }
00840 break;
00841 default:
00842
00843
00844 break;
00845 }
00846
00847
00848 sourceIndex=nextSourceIndex;
00849 continue;
00850
00851 callback:
00852
00853
00854 pArgs->source=(const char *)source;
00855 pArgs->target=target;
00856 pArgs->offsets=offsets;
00857
00858
00859 cnv->invalidCharBuffer[0]=b;
00860 cnv->invalidCharLength=1;
00861
00862
00863 toUCallback(cnv, cnv->toUContext, pArgs, (const char *)&b, 1, reason, pErrorCode);
00864
00865
00866 offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
00867 target=pArgs->target;
00868
00869
00870 sourceIndex=nextSourceIndex+((const uint8_t *)pArgs->source-source);
00871 source=(const uint8_t *)pArgs->source;
00872
00873
00874
00875
00876
00877 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
00878 break;
00879 } else if(cnv->UCharErrorBufferLength>0) {
00880
00881 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00882 break;
00883 } else if(U_FAILURE(*pErrorCode)) {
00884
00885 break;
00886 }
00887
00888
00889
00890
00891
00892
00893 } else {
00894
00895 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00896 break;
00897 }
00898 }
00899 endloop:
00900
00901
00902 pArgs->source=(const char *)source;
00903 pArgs->target=target;
00904 pArgs->offsets=offsets;
00905 }
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915 U_CFUNC UChar32
00916 _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
00917 UErrorCode *pErrorCode) {
00918 UChar buffer[UTF_MAX_CHAR_LENGTH];
00919 const char *realLimit=pArgs->sourceLimit;
00920
00921 pArgs->target=buffer;
00922 pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
00923
00924 while(pArgs->source<realLimit) {
00925
00926 pArgs->sourceLimit=pArgs->source+1;
00927 pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
00928 _MBCSToUnicode(pArgs, pErrorCode);
00929 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
00930 return 0xffff;
00931 } else {
00932 int32_t length=pArgs->target-buffer;
00933 #if 0
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959 if(
00960 length>0 &&
00961 (pArgs->flush || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
00962 #endif
00963 if(length>0) {
00964 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
00965 *pErrorCode=U_ZERO_ERROR;
00966 }
00967 return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
00968 }
00969 }
00970 }
00971
00972
00973 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
00974 return 0xffff;
00975 }
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987 U_CFUNC UChar32
00988 _MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
00989 const char **pSource, const char *sourceLimit,
00990 UBool useFallback) {
00991 const uint8_t *source;
00992
00993 const int32_t (*stateTable)[256];
00994 const uint16_t *unicodeCodeUnits;
00995
00996 uint32_t offset;
00997 uint8_t state;
00998
00999 int32_t entry;
01000
01001
01002 source=(const uint8_t *)*pSource;
01003 if(source>=(const uint8_t *)sourceLimit) {
01004
01005 return 0xfffe;
01006 }
01007
01008
01009 if(sharedData->table->mbcs.countStates==1) {
01010 return _MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)(*(*pSource)++), useFallback);
01011 }
01012
01013 stateTable=sharedData->table->mbcs.stateTable;
01014 unicodeCodeUnits=sharedData->table->mbcs.unicodeCodeUnits;
01015
01016
01017 offset=0;
01018 state=0;
01019
01020
01021 do {
01022 entry=stateTable[state][*source++];
01023 if(entry>=0) {
01024
01025
01026
01027
01028
01029 state=(uint8_t)(entry&0x7f);
01030 offset+=entry>>7;
01031 } else {
01032
01033
01034
01035
01036
01037
01038
01039
01040 *pSource=(const char *)source;
01041
01042
01043 switch((uint32_t)entry>>27U) {
01044 case 16|MBCS_STATE_ILLEGAL:
01045
01046 return 0xffff;
01047 case 16|MBCS_STATE_CHANGE_ONLY:
01048
01049
01050
01051
01052
01053
01054
01055
01056 if(source==(const uint8_t *)sourceLimit) {
01057
01058 return 0xfffe;
01059 }
01060 break;
01061 case 16|MBCS_STATE_UNASSIGNED:
01062
01063 return 0xfffe;
01064 case 16|MBCS_STATE_FALLBACK_DIRECT_16:
01065
01066
01067 if(!TO_U_USE_FALLBACK(useFallback)) {
01068 return 0xfffe;
01069 }
01070
01071 case 16|MBCS_STATE_VALID_DIRECT_16:
01072
01073
01074
01075 return (UChar)(entry>>7);
01076 case 16|MBCS_STATE_FALLBACK_DIRECT_20:
01077
01078 if(!TO_U_USE_FALLBACK(useFallback)) {
01079 return 0xfffe;
01080 }
01081
01082 case 16|MBCS_STATE_VALID_DIRECT_20:
01083
01084 return 0x10000+((entry>>7)&0xfffff);
01085 case 16|MBCS_STATE_VALID_16:
01086
01087
01088 offset+=(uint16_t)entry>>7;
01089 entry=unicodeCodeUnits[offset];
01090 if(entry!=0xfffe) {
01091 return (UChar32)entry;
01092 } else {
01093 return _MBCSGetFallback(&sharedData->table->mbcs, offset);
01094 }
01095 case 16|MBCS_STATE_VALID_16_PAIR:
01096
01097
01098 offset+=(uint16_t)entry>>7;
01099 entry=unicodeCodeUnits[offset++];
01100 if(UTF_IS_FIRST_SURROGATE(entry)) {
01101 return UTF16_GET_PAIR_VALUE(entry, unicodeCodeUnits[offset]);
01102 } else if(entry!=0xfffe) {
01103
01104 return (UChar32)entry;
01105 } else {
01106
01107
01108
01109
01110 return _MBCSGetFallback(&sharedData->table->mbcs, offset-1);
01111 }
01112 default:
01113
01114
01115 break;
01116 }
01117
01118
01119 state=(uint8_t)(entry&0x7f);
01120 offset=0;
01121 }
01122 } while(source<(const uint8_t *)sourceLimit);
01123
01124 *pSource=(const char *)source;
01125 return 0xffff;
01126 }
01127
01128
01129 U_CFUNC UChar32
01130 _MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
01131 uint8_t b, UBool useFallback) {
01132 int32_t entry;
01133
01134 entry=sharedData->table->mbcs.stateTable[0][b];
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145 switch((uint32_t)entry>>27U) {
01146 case 16|MBCS_STATE_ILLEGAL:
01147
01148 return 0xffff;
01149 case 16|MBCS_STATE_UNASSIGNED:
01150
01151 return 0xfffe;
01152 case 16|MBCS_STATE_FALLBACK_DIRECT_16:
01153
01154
01155 if(!TO_U_USE_FALLBACK(useFallback)) {
01156 return 0xfffe;
01157 }
01158
01159 case 16|MBCS_STATE_VALID_DIRECT_16:
01160
01161
01162
01163 return (UChar)(entry>>7);
01164 case 16|MBCS_STATE_FALLBACK_DIRECT_20:
01165
01166 if(!TO_U_USE_FALLBACK(useFallback)) {
01167 return 0xfffe;
01168 }
01169
01170 case 16|MBCS_STATE_VALID_DIRECT_20:
01171
01172 return 0x10000+((entry>>7)&0xfffff);
01173 default:
01174
01175
01176 return 0xffff;
01177 }
01178 }
01179
01180
01181
01182 U_CFUNC void
01183 _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
01184 UErrorCode *pErrorCode) {
01185 UConverter *cnv;
01186 const UChar *source, *sourceLimit;
01187 uint8_t *target;
01188 int32_t targetCapacity;
01189 int32_t *offsets;
01190
01191 const uint16_t *table;
01192 const uint8_t *bytes;
01193 uint8_t outputType;
01194
01195 UChar32 c;
01196
01197 int32_t sourceIndex, nextSourceIndex;
01198
01199 UConverterCallbackReason reason;
01200 uint32_t i;
01201 uint32_t value;
01202 int32_t length;
01203
01204
01205 cnv=pArgs->converter;
01206 outputType=cnv->sharedData->table->mbcs.outputType;
01207 if(outputType==MBCS_OUTPUT_1) {
01208 _MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
01209 return;
01210 }
01211
01212
01213 source=pArgs->source;
01214 sourceLimit=pArgs->sourceLimit;
01215 target=(uint8_t *)pArgs->target;
01216 targetCapacity=pArgs->targetLimit-pArgs->target;
01217 offsets=pArgs->offsets;
01218
01219 table=cnv->sharedData->table->mbcs.fromUnicodeTable;
01220 bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
01221
01222
01223 c=cnv->fromUSurrogateLead;
01224
01225
01226 sourceIndex= c==0 ? 0 : -1;
01227 nextSourceIndex=0;
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243 if(c!=0 && targetCapacity>0) {
01244 goto getTrail;
01245 }
01246
01247 while(source<sourceLimit) {
01248
01249
01250
01251
01252
01253
01254
01255
01256 if(targetCapacity>0) {
01257
01258
01259
01260
01261
01262 c=*source++;
01263 ++nextSourceIndex;
01264 if(UTF_IS_SURROGATE(c)) {
01265 if(UTF_IS_SURROGATE_FIRST(c)) {
01266 getTrail:
01267 if(source<sourceLimit) {
01268
01269 UChar trail=*source;
01270 if(UTF_IS_SECOND_SURROGATE(trail)) {
01271 ++source;
01272 ++nextSourceIndex;
01273 c=UTF16_GET_PAIR_VALUE(c, trail);
01274
01275
01276 } else {
01277
01278
01279 reason=UCNV_ILLEGAL;
01280 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
01281 goto callback;
01282 }
01283 } else {
01284
01285 break;
01286 }
01287 } else {
01288
01289
01290 reason=UCNV_ILLEGAL;
01291 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
01292 goto callback;
01293 }
01294 }
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337 i=0x440+2*((uint32_t)table[c>>10]+((c>>4)&0x3f));
01338
01339
01340 if((table[i++]&(1<<(c&0xf)))!=0 || UCNV_FROM_U_USE_FALLBACK(cnv, c)) {
01341 const uint8_t *p=bytes;
01342
01343
01344 switch(outputType) {
01345 case MBCS_OUTPUT_1:
01346 p+=(16*(uint32_t)table[i]+(c&0xf));
01347 value=*p;
01348 length=1;
01349 break;
01350 case MBCS_OUTPUT_2:
01351 p+=(16*(uint32_t)table[i]+(c&0xf))*2;
01352 # if U_IS_BIG_ENDIAN
01353 value=*(uint16_t *)p;
01354 # else
01355 value=((uint32_t)*p<<8)|p[1];
01356 # endif
01357 if(value<=0xff) {
01358 length=1;
01359 } else {
01360 length=2;
01361 }
01362 break;
01363 case MBCS_OUTPUT_3:
01364 p+=(16*(uint32_t)table[i]+(c&0xf))*3;
01365 value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
01366 if(value<=0xff) {
01367 length=1;
01368 } else if(value<=0xffff) {
01369 length=2;
01370 } else {
01371 length=3;
01372 }
01373 break;
01374 case MBCS_OUTPUT_4:
01375 p+=(16*(uint32_t)table[i]+(c&0xf))*4;
01376 # if U_IS_BIG_ENDIAN
01377 value=*(uint32_t *)p;
01378 # else
01379 value=((uint32_t)*p<<24)|((uint32_t)p[1]<<16)|((uint32_t)p[2]<<8)|p[3];
01380 # endif
01381 if(value<=0xff) {
01382 length=1;
01383 } else if(value<=0xffff) {
01384 length=2;
01385 } else if(value<=0xffffff) {
01386 length=3;
01387 } else {
01388 length=4;
01389 }
01390 break;
01391 case MBCS_OUTPUT_3_EUC:
01392 p+=(16*(uint32_t)table[i]+(c&0xf))*2;
01393 # if U_IS_BIG_ENDIAN
01394 value=*(uint16_t *)p;
01395 # else
01396 value=((uint32_t)*p<<8)|p[1];
01397 # endif
01398
01399 if(value<=0xff) {
01400 length=1;
01401 } else if((value&0x8000)==0) {
01402 value|=0x8e8000;
01403 length=3;
01404 } else if((value&0x80)==0) {
01405 value|=0x8f0080;
01406 length=3;
01407 } else {
01408 length=2;
01409 }
01410 break;
01411 case MBCS_OUTPUT_4_EUC:
01412 p+=(16*(uint32_t)table[i]+(c&0xf))*3;
01413 value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
01414
01415 if(value<=0xff) {
01416 length=1;
01417 } else if(value<=0xffff) {
01418 length=2;
01419 } else if((value&0x800000)==0) {
01420 value|=0x8e800000;
01421 length=4;
01422 } else if((value&0x8000)==0) {
01423 value|=0x8f008000;
01424 length=4;
01425 } else {
01426 length=3;
01427 }
01428 break;
01429 default:
01430
01431
01432
01433
01434
01435
01436
01437
01438 value=0;
01439 length=0;
01440 break;
01441 }
01442
01443
01444 if(value==0 && c!=0 && (table[i-1]&(1<<(c&0xf)))==0) {
01445
01446
01447
01448
01449
01450
01451
01452 reason=UCNV_UNASSIGNED;
01453 *pErrorCode=U_INVALID_CHAR_FOUND;
01454 goto callback;
01455 }
01456 } else {
01457
01458 reason=UCNV_UNASSIGNED;
01459 *pErrorCode=U_INVALID_CHAR_FOUND;
01460 goto callback;
01461 }
01462
01463
01464
01465 if(length<=targetCapacity) {
01466 if(offsets==NULL) {
01467 switch(length) {
01468
01469 case 4:
01470 *target++=(uint8_t)(value>>24);
01471 case 3:
01472 *target++=(uint8_t)(value>>16);
01473 case 2:
01474 *target++=(uint8_t)(value>>8);
01475 case 1:
01476 *target++=(uint8_t)value;
01477 default:
01478
01479 break;
01480 }
01481 } else {
01482 switch(length) {
01483
01484 case 4:
01485 *target++=(uint8_t)(value>>24);
01486 *offsets++=sourceIndex;
01487 case 3:
01488 *target++=(uint8_t)(value>>16);
01489 *offsets++=sourceIndex;
01490 case 2:
01491 *target++=(uint8_t)(value>>8);
01492 *offsets++=sourceIndex;
01493 case 1:
01494 *target++=(uint8_t)value;
01495 *offsets++=sourceIndex;
01496 default:
01497
01498 break;
01499 }
01500 }
01501 targetCapacity-=length;
01502 } else {
01503 uint8_t *p;
01504
01505
01506
01507
01508
01509
01510
01511
01512 length-=targetCapacity;
01513 p=(uint8_t *)cnv->charErrorBuffer;
01514 switch(length) {
01515
01516 case 3:
01517 *p++=(uint8_t)(value>>16);
01518 case 2:
01519 *p++=(uint8_t)(value>>8);
01520 case 1:
01521 *p=(uint8_t)value;
01522 default:
01523
01524 break;
01525 }
01526 cnv->charErrorBufferLength=(int8_t)length;
01527
01528
01529 value>>=8*length;
01530 switch(targetCapacity) {
01531
01532 case 3:
01533 *target++=(uint8_t)(value>>16);
01534 if(offsets!=NULL) {
01535 *offsets++=sourceIndex;
01536 }
01537 case 2:
01538 *target++=(uint8_t)(value>>8);
01539 if(offsets!=NULL) {
01540 *offsets++=sourceIndex;
01541 }
01542 case 1:
01543 *target++=(uint8_t)value;
01544 if(offsets!=NULL) {
01545 *offsets++=sourceIndex;
01546 }
01547 default:
01548
01549 break;
01550 }
01551
01552
01553 targetCapacity=0;
01554 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
01555 c=0;
01556 break;
01557 }
01558
01559
01560 c=0;
01561 sourceIndex=nextSourceIndex;
01562 continue;
01563
01564
01565
01566
01567
01568 callback:
01569
01570
01571 pArgs->source=source;
01572 pArgs->target=(char *)target;
01573 pArgs->offsets=offsets;
01574
01575
01576 cnv->fromUSurrogateLead=0;
01577
01578
01579 i=0;
01580 UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
01581 cnv->invalidUCharLength=(int8_t)i;
01582
01583
01584 fromUCallback(cnv, cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);
01585
01586
01587 c=cnv->fromUSurrogateLead;
01588
01589
01590 offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
01591 target=(uint8_t *)pArgs->target;
01592
01593
01594 sourceIndex=nextSourceIndex+(pArgs->source-source);
01595 source=pArgs->source;
01596 targetCapacity=(uint8_t *)pArgs->targetLimit-target;
01597
01598
01599
01600
01601
01602 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
01603 break;
01604 } else if(cnv->charErrorBufferLength>0) {
01605
01606 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
01607 break;
01608 } else if(U_FAILURE(*pErrorCode)) {
01609
01610 c=0;
01611 break;
01612 }
01613
01614
01615
01616
01617
01618
01619 } else {
01620
01621 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
01622 break;
01623 }
01624 }
01625
01626 if(pArgs->flush && source>=sourceLimit) {
01627
01628 if(c!=0 && U_SUCCESS(*pErrorCode)) {
01629
01630 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
01631 }
01632 cnv->fromUSurrogateLead=0;
01633 } else {
01634
01635 cnv->fromUSurrogateLead=(UChar)c;
01636 }
01637
01638
01639 pArgs->source=source;
01640 pArgs->target=(char *)target;
01641 pArgs->offsets=offsets;
01642 }
01643
01644 U_CFUNC void
01645 _MBCSFromUnicode(UConverterFromUnicodeArgs *pArgs,
01646 UErrorCode *pErrorCode) {
01647 _MBCSFromUnicodeWithOffsets(pArgs, pErrorCode);
01648 }
01649
01650
01651 U_CFUNC void
01652 _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
01653 UErrorCode *pErrorCode) {
01654 UConverter *cnv;
01655 const UChar *source, *sourceLimit;
01656 uint8_t *target;
01657 int32_t targetCapacity;
01658 int32_t *offsets;
01659
01660 const uint16_t *table;
01661 const uint8_t *bytes;
01662 uint8_t outputType;
01663
01664 UChar32 c;
01665
01666 int32_t sourceIndex, nextSourceIndex;
01667
01668 UConverterCallbackReason reason;
01669 uint32_t i;
01670 uint32_t value;
01671
01672
01673 cnv=pArgs->converter;
01674 source=pArgs->source;
01675 sourceLimit=pArgs->sourceLimit;
01676 target=(uint8_t *)pArgs->target;
01677 targetCapacity=pArgs->targetLimit-pArgs->target;
01678 offsets=pArgs->offsets;
01679
01680 table=cnv->sharedData->table->mbcs.fromUnicodeTable;
01681 bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
01682 outputType=cnv->sharedData->table->mbcs.outputType;
01683
01684
01685 c=cnv->fromUSurrogateLead;
01686
01687
01688 sourceIndex= c==0 ? 0 : -1;
01689 nextSourceIndex=0;
01690
01691
01692 if(c!=0 && targetCapacity>0) {
01693 goto getTrail;
01694 }
01695
01696 while(source<sourceLimit) {
01697
01698
01699
01700
01701
01702
01703
01704
01705 if(targetCapacity>0) {
01706
01707
01708
01709
01710
01711 c=*source++;
01712 ++nextSourceIndex;
01713 if(UTF_IS_SURROGATE(c)) {
01714 if(UTF_IS_SURROGATE_FIRST(c)) {
01715 getTrail:
01716 if(source<sourceLimit) {
01717
01718 UChar trail=*source;
01719 if(UTF_IS_SECOND_SURROGATE(trail)) {
01720 ++source;
01721 ++nextSourceIndex;
01722 c=UTF16_GET_PAIR_VALUE(c, trail);
01723
01724
01725 } else {
01726
01727
01728 reason=UCNV_ILLEGAL;
01729 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
01730 goto callback;
01731 }
01732 } else {
01733
01734 break;
01735 }
01736 } else {
01737
01738
01739 reason=UCNV_ILLEGAL;
01740 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
01741 goto callback;
01742 }
01743 }
01744
01745
01746 i=0x440+2*((uint32_t)table[c>>10]+((c>>4)&0x3f));
01747
01748
01749 if((table[i++]&(1<<(c&0xf)))!=0 || UCNV_FROM_U_USE_FALLBACK(cnv, c)) {
01750 const uint8_t *p=bytes;
01751
01752
01753 p+=(16*(uint32_t)table[i]+(c&0xf));
01754 value=*p;
01755
01756
01757 if(value==0 && c!=0 && (table[i-1]&(1<<(c&0xf)))==0) {
01758
01759
01760
01761
01762
01763
01764
01765 reason=UCNV_UNASSIGNED;
01766 *pErrorCode=U_INVALID_CHAR_FOUND;
01767 goto callback;
01768 }
01769 } else {
01770
01771 reason=UCNV_UNASSIGNED;
01772 *pErrorCode=U_INVALID_CHAR_FOUND;
01773 goto callback;
01774 }
01775
01776
01777
01778
01779 *target++=(uint8_t)value;
01780 if(offsets!=NULL) {
01781 *offsets++=sourceIndex;
01782 }
01783 --targetCapacity;
01784
01785
01786 c=0;
01787 sourceIndex=nextSourceIndex;
01788 continue;
01789
01790 callback:
01791
01792
01793 pArgs->source=source;
01794 pArgs->target=(char *)target;
01795 pArgs->offsets=offsets;
01796
01797
01798 cnv->fromUSurrogateLead=0;
01799
01800
01801 i=0;
01802 UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
01803 cnv->invalidUCharLength=(int8_t)i;
01804
01805
01806 fromUCallback(cnv, cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);
01807
01808
01809 c=cnv->fromUSurrogateLead;
01810
01811
01812 offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
01813 target=(uint8_t *)pArgs->target;
01814
01815
01816 sourceIndex=nextSourceIndex+(pArgs->source-source);
01817 source=pArgs->source;
01818 targetCapacity=(uint8_t *)pArgs->targetLimit-target;
01819
01820
01821
01822
01823
01824 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
01825 break;
01826 } else if(cnv->charErrorBufferLength>0) {
01827
01828 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
01829 break;
01830 } else if(U_FAILURE(*pErrorCode)) {
01831
01832 c=0;
01833 break;
01834 }
01835
01836
01837
01838
01839
01840
01841 } else {
01842
01843 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
01844 break;
01845 }
01846 }
01847
01848 if(pArgs->flush && source>=sourceLimit) {
01849
01850 if(c!=0 && U_SUCCESS(*pErrorCode)) {
01851
01852 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
01853 }
01854 cnv->fromUSurrogateLead=0;
01855 } else {
01856
01857 cnv->fromUSurrogateLead=(UChar)c;
01858 }
01859
01860
01861 pArgs->source=source;
01862 pArgs->target=(char *)target;
01863 pArgs->offsets=offsets;
01864 }
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876
01877
01878
01879
01880 U_CFUNC int32_t
01881 _MBCSFromUChar32(UConverterSharedData *sharedData,
01882 UChar32 c, uint32_t *pValue,
01883 UBool useFallback) {
01884 const uint16_t *table=sharedData->table->mbcs.fromUnicodeTable;
01885 uint32_t i;
01886 uint32_t value;
01887 int32_t length;
01888
01889
01890 i=0x440+2*((uint32_t)table[c>>10]+((c>>4)&0x3f));
01891
01892
01893 if((table[i++]&(1<<(c&0xf)))!=0 || FROM_U_USE_FALLBACK(useFallback, c)) {
01894 const uint8_t *p=sharedData->table->mbcs.fromUnicodeBytes;
01895
01896
01897 switch(sharedData->table->mbcs.outputType) {
01898 case MBCS_OUTPUT_1:
01899 p+=(16*(uint32_t)table[i]+(c&0xf));
01900 value=*p;
01901 length=1;
01902 break;
01903 case MBCS_OUTPUT_2:
01904 p+=(16*(uint32_t)table[i]+(c&0xf))*2;
01905 # if U_IS_BIG_ENDIAN
01906 value=*(uint16_t *)p;
01907 # else
01908 value=((uint32_t)*p<<8)|p[1];
01909 # endif
01910 if(value<=0xff) {
01911 length=1;
01912 } else {
01913 length=2;
01914 }
01915 break;
01916 case MBCS_OUTPUT_3:
01917 p+=(16*(uint32_t)table[i]+(c&0xf))*3;
01918 value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
01919 if(value<=0xff) {
01920 length=1;
01921 } else if(value<=0xffff) {
01922 length=2;
01923 } else {
01924 length=3;
01925 }
01926 break;
01927 case MBCS_OUTPUT_4:
01928 p+=(16*(uint32_t)table[i]+(c&0xf))*4;
01929 # if U_IS_BIG_ENDIAN
01930 value=*(uint32_t *)p;
01931 # else
01932 value=((uint32_t)*p<<24)|((uint32_t)p[1]<<16)|((uint32_t)p[2]<<8)|p[3];
01933 # endif
01934 if(value<=0xff) {
01935 length=1;
01936 } else if(value<=0xffff) {
01937 length=2;
01938 } else if(value<=0xffffff) {
01939 length=3;
01940 } else {
01941 length=4;
01942 }
01943 break;
01944 case MBCS_OUTPUT_3_EUC:
01945 p+=(16*(uint32_t)table[i]+(c&0xf))*2;
01946 # if U_IS_BIG_ENDIAN
01947 value=*(uint16_t *)p;
01948 # else
01949 value=((uint32_t)*p<<8)|p[1];
01950 # endif
01951
01952 if(value<=0xff) {
01953 length=1;
01954 } else if((value&0x8000)==0) {
01955 value|=0x8e8000;
01956 length=3;
01957 } else if((value&0x80)==0) {
01958 value|=0x8f0080;
01959 length=3;
01960 } else {
01961 length=2;
01962 }
01963 break;
01964 case MBCS_OUTPUT_4_EUC:
01965 p+=(16*(uint32_t)table[i]+(c&0xf))*3;
01966 value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
01967
01968 if(value<=0xff) {
01969 length=1;
01970 } else if(value<=0xffff) {
01971 length=2;
01972 } else if((value&0x800000)==0) {
01973 value|=0x8e800000;
01974 length=4;
01975 } else if((value&0x8000)==0) {
01976 value|=0x8f008000;
01977 length=4;
01978 } else {
01979 length=3;
01980 }
01981 break;
01982 default:
01983
01984 return -1;
01985 }
01986
01987
01988 if(value==0 && c!=0 && (table[i-1]&(1<<(c&0xf)))==0) {
01989
01990
01991
01992
01993
01994
01995 return 0;
01996 } else {
01997 *pValue=value;
01998 return length;
01999 }
02000 } else {
02001 return 0;
02002 }
02003 }
02004
02005
02006
02007 static void
02008 _MBCSGetStarters(const UConverter* cnv,
02009 UBool starters[256],
02010 UErrorCode *pErrorCode) {
02011 const int32_t *state0=cnv->sharedData->table->mbcs.stateTable[0];
02012 int i;
02013
02014 for(i=0; i<256; ++i) {
02015
02016 starters[i]= (UBool)(state0[i]>=0);
02017 }
02018 }
02019
02020
02021
02022
02023
02024 U_CFUNC UBool
02025 _MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
02026 return (UBool)(sharedData->table->mbcs.stateTable[0][(uint8_t)byte]>=0);
02027 }
02028
02029 static const UConverterImpl _MBCSImpl={
02030 UCNV_MBCS,
02031
02032 _MBCSLoad,
02033 NULL,
02034
02035 _MBCSOpen,
02036 NULL,
02037 _MBCSReset,
02038
02039 _MBCSToUnicode,
02040 _MBCSToUnicodeWithOffsets,
02041 _MBCSFromUnicode,
02042 _MBCSFromUnicodeWithOffsets,
02043 _MBCSGetNextUChar,
02044
02045 _MBCSGetStarters,
02046 NULL
02047 };
02048
02049
02050
02051
02052
02053
02054 const UConverterSharedData _MBCSData={
02055 sizeof(UConverterSharedData), 1,
02056 NULL, NULL, NULL, FALSE, &_MBCSImpl,
02057 0
02058 };
02059
02060
02061
02062
02063
02064
02065
02066
02067 static void
02068 fromUCallback(UConverter *cnv,
02069 void *context, UConverterFromUnicodeArgs *pArgs,
02070 const UChar *codeUnits, int32_t length, UChar32 codePoint,
02071 UConverterCallbackReason reason, UErrorCode *pErrorCode) {
02072 if(cnv->extraInfo==gb18030Ranges && (reason==UCNV_UNASSIGNED || reason==UCNV_ILLEGAL)) {
02073 const uint32_t *range;
02074 int i;
02075
02076 range=gb18030Ranges[0];
02077 for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
02078 if(range[0]<=(uint32_t)codePoint && (uint32_t)codePoint<=range[1]) {
02079 uint32_t linear;
02080 char bytes[4];
02081
02082
02083 *pErrorCode=U_ZERO_ERROR;
02084
02085
02086 linear=range[2]-LINEAR_18030_BASE;
02087
02088
02089 linear+=((uint32_t)codePoint-range[0]);
02090
02091
02092 bytes[3]=(const char)(0x30+linear%10); linear/=10;
02093 bytes[2]=(const char)(0x81+linear%126); linear/=126;
02094 bytes[1]=(const char)(0x30+linear%10); linear/=10;
02095 bytes[0]=(const char)(0x81+linear);
02096
02097
02098 ucnv_cbFromUWriteBytes(pArgs, bytes, 4, 0, pErrorCode);
02099 return;
02100 }
02101 }
02102 }
02103
02104
02105 cnv->fromUCharErrorBehaviour(context, pArgs, codeUnits, length, codePoint, reason, pErrorCode);
02106 }
02107
02108 static void
02109 toUCallback(UConverter *cnv,
02110 void *context, UConverterToUnicodeArgs *pArgs,
02111 const char *codeUnits, int32_t length,
02112 UConverterCallbackReason reason, UErrorCode *pErrorCode) {
02113 if(cnv->extraInfo==gb18030Ranges && reason==UCNV_UNASSIGNED && length==4) {
02114 const uint32_t *range;
02115 uint32_t linear;
02116 int i;
02117
02118 linear=LINEAR_18030((uint8_t)codeUnits[0], (uint8_t)codeUnits[1], (uint8_t)codeUnits[2], (uint8_t)codeUnits[3]);
02119 range=gb18030Ranges[0];
02120 for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
02121 if(range[2]<=linear && linear<=range[3]) {
02122 UChar u[UTF_MAX_CHAR_LENGTH];
02123
02124
02125 *pErrorCode=U_ZERO_ERROR;
02126
02127
02128 linear=range[0]+(linear-range[2]);
02129
02130
02131 i=0;
02132 UTF_APPEND_CHAR_UNSAFE(u, i, linear);
02133 ucnv_cbToUWriteUChars(pArgs, u, i, 0, pErrorCode);
02134 return;
02135 }
02136 }
02137 }
02138
02139
02140 cnv->fromCharErrorBehaviour(context, pArgs, codeUnits, length, reason, pErrorCode);
02141 }