00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "unicode/utypes.h"
00027 #include "cmemory.h"
00028 #include "ucmp16.h"
00029 #include "ucmp8.h"
00030 #include "unicode/ucnv_err.h"
00031 #include "ucnv_bld.h"
00032 #include "unicode/ucnv.h"
00033 #include "ucnv_cnv.h"
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075 #define ULMBCS_CHARSIZE_MAX 3
00076
00077
00078
00079
00080
00081 #define ULMBCS_C0END 0x1F
00082 #define ULMBCS_C1START 0x80
00083
00084
00085
00086
00087
00088 typedef uint8_t ulmbcs_byte_t;
00089
00090
00091
00092
00093
00094
00095
00096
00097 #define ULMBCS_GRP_L1 0x01
00098 #define ULMBCS_GRP_GR 0x02
00099 #define ULMBCS_GRP_HE 0x03
00100 #define ULMBCS_GRP_AR 0x04
00101 #define ULMBCS_GRP_RU 0x05
00102 #define ULMBCS_GRP_L2 0x06
00103 #define ULMBCS_GRP_TR 0x08
00104 #define ULMBCS_GRP_TH 0x0B
00105 #define ULMBCS_GRP_JA 0x10
00106 #define ULMBCS_GRP_KO 0x11
00107 #define ULMBCS_GRP_CN 0x12
00108 #define ULMBCS_GRP_TW 0x13
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122 #define ULMBCS_DOUBLEOPTGROUP_START 0x10
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 #define ULMBCS_HT 0x09
00145 #define ULMBCS_LF 0x0A
00146 #define ULMBCS_CR 0x0D
00147
00148
00149
00150
00151 #define ULMBCS_123SYSTEMRANGE 0x19
00152
00153
00154
00155
00156
00157
00158 #define ULMBCS_GRP_CTRL 0x0F
00159
00160
00161
00162
00163 #define ULMBCS_CTRLOFFSET 0x20
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 #define ULMBCS_GRP_EXCEPT 0x00
00179
00180
00181
00182
00183 #define ULMBCS_GRP_UNICODE 0x14
00184
00185
00186
00187
00188
00189
00190 #define ULMBCS_UNICOMPATZERO 0xF6
00191
00192
00193
00194 #define ULMBCS_UNICODE_SIZE 3
00195
00196
00197
00198
00199
00200
00201
00202
00203 #define ULMBCS_DEFAULTOPTGROUP 0x1
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 static const char * OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = {
00221 "lmb-excp",
00222 "ibm-850",
00223 "ibm-851",
00224 "ibm-1255",
00225 "ibm-1256",
00226 "ibm-1251",
00227 "ibm-852",
00228 NULL,
00229 "ibm-1254",
00230 NULL,
00231 NULL,
00232 "ibm-874",
00233 NULL,
00234 NULL,
00235 NULL,
00236 NULL,
00237 "ibm-943",
00238 "ibm-1363",
00239 "ibm-950",
00240 "ibm-1386"
00241
00242
00243
00244 };
00245
00246
00247
00248
00249
00250 #define ULMBCS_GRP_LAST 0x13
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269 #define ULMBCS_AMBIGUOUS_SBCS 0x80
00270
00271
00272 #define ULMBCS_AMBIGUOUS_MBCS 0x81
00273
00274
00275
00276
00277 #define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \
00278 ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \
00279 (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \
00280 (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \
00281 (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START))
00282
00283
00284
00285
00286
00287 struct _UniLMBCSGrpMap
00288 {
00289 UChar uniStartRange;
00290 UChar uniEndRange;
00291 ulmbcs_byte_t GrpType;
00292 } UniLMBCSGrpMap[]
00293 =
00294 {
00295
00296 {0x0001, 0x001F, ULMBCS_GRP_CTRL},
00297 {0x0080, 0x009F, ULMBCS_GRP_CTRL},
00298 {0x00A0, 0x01CD, ULMBCS_AMBIGUOUS_SBCS},
00299 {0x01CE, 0x01CE, ULMBCS_GRP_TW },
00300 {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS},
00301 {0x02BA, 0x02BA, ULMBCS_GRP_CN},
00302 {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS},
00303 {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS},
00304 {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS},
00305 {0x0384, 0x03CE, ULMBCS_AMBIGUOUS_SBCS},
00306 {0x0400, 0x044E, ULMBCS_GRP_RU},
00307 {0x044F, 0x044F, ULMBCS_AMBIGUOUS_MBCS},
00308 {0x0450, 0x0491, ULMBCS_GRP_RU},
00309 {0x05B0, 0x05F2, ULMBCS_GRP_HE},
00310 {0x060C, 0x06AF, ULMBCS_GRP_AR},
00311 {0x0E01, 0x0E5B, ULMBCS_GRP_TH},
00312 {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS},
00313 {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS},
00314 {0x2013, 0x2015, ULMBCS_AMBIGUOUS_SBCS},
00315 {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS},
00316 {0x2017, 0x2024, ULMBCS_AMBIGUOUS_SBCS},
00317 {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS},
00318 {0x2026, 0x2026, ULMBCS_AMBIGUOUS_SBCS},
00319 {0x2027, 0x2027, ULMBCS_GRP_CN},
00320 {0x2030, 0x2033, ULMBCS_AMBIGUOUS_SBCS},
00321 {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS},
00322 {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS},
00323 {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS},
00324 {0x2074, 0x2074, ULMBCS_GRP_KO},
00325 {0x207F, 0x207F, ULMBCS_GRP_EXCEPT},
00326 {0x2081, 0x2084, ULMBCS_GRP_KO},
00327 {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS},
00328 {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS},
00329 {0x2111, 0x2126, ULMBCS_AMBIGUOUS_SBCS},
00330 {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS},
00331 {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS},
00332 {0x2153, 0x2154, ULMBCS_GRP_KO},
00333 {0x215B, 0x215E, ULMBCS_GRP_EXCEPT},
00334 {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS},
00335 {0x2190, 0x2195, ULMBCS_GRP_EXCEPT},
00336 {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS},
00337 {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT},
00338 {0x21B8, 0x21B9, ULMBCS_GRP_CN},
00339 {0x21D0, 0x21D5, ULMBCS_GRP_EXCEPT},
00340 {0x21E7, 0x21E7, ULMBCS_GRP_CN},
00341 {0x2200, 0x220B, ULMBCS_GRP_EXCEPT},
00342 {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS},
00343 {0x2219, 0x2220, ULMBCS_GRP_EXCEPT},
00344 {0x2223, 0x2228, ULMBCS_AMBIGUOUS_MBCS},
00345 {0x2229, 0x222B, ULMBCS_GRP_EXCEPT},
00346 {0x222C, 0x223D, ULMBCS_AMBIGUOUS_MBCS},
00347 {0x2245, 0x2248, ULMBCS_GRP_EXCEPT},
00348 {0x224C, 0x224C, ULMBCS_GRP_TW},
00349 {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS},
00350 {0x2260, 0x2265, ULMBCS_GRP_EXCEPT},
00351 {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS},
00352 {0x2282, 0x2297, ULMBCS_GRP_EXCEPT},
00353 {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS},
00354 {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT},
00355 {0x2310, 0x2310, ULMBCS_GRP_EXCEPT},
00356 {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS},
00357 {0x2318, 0x2321, ULMBCS_GRP_EXCEPT},
00358 {0x2318, 0x2321, ULMBCS_GRP_CN},
00359 {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS},
00360 {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS},
00361 {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS},
00362 {0x2502, 0x2502, ULMBCS_AMBIGUOUS_SBCS},
00363 {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS},
00364 {0x2504, 0x2505, ULMBCS_GRP_TW},
00365 {0x2506, 0x2665, ULMBCS_AMBIGUOUS_MBCS},
00366 {0x2666, 0x2666, ULMBCS_GRP_EXCEPT},
00367 {0x2667, 0xFFFE, ULMBCS_AMBIGUOUS_MBCS},
00368 {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE}
00369 };
00370
00371 ulmbcs_byte_t
00372 FindLMBCSUniRange(UChar uniChar)
00373 {
00374 struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap;
00375
00376 while (uniChar > pTable->uniEndRange)
00377 {
00378 pTable++;
00379 }
00380
00381 if (uniChar >= pTable->uniStartRange)
00382 {
00383 return pTable->GrpType;
00384 }
00385 return ULMBCS_GRP_UNICODE;
00386 }
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 struct _LocaleLMBCSGrpMap
00411 {
00412 const char *LocaleID;
00413 ulmbcs_byte_t OptGroup;
00414 } LocaleLMBCSGrpMap[] =
00415 {
00416 "ar", ULMBCS_GRP_AR,
00417 "be", ULMBCS_GRP_RU,
00418 "bg", ULMBCS_GRP_L2,
00419
00420 "cs", ULMBCS_GRP_L2,
00421
00422
00423 "el", ULMBCS_GRP_GR,
00424
00425
00426
00427
00428
00429 "he", ULMBCS_GRP_HE,
00430 "hu", ULMBCS_GRP_L2,
00431
00432
00433 "iw", ULMBCS_GRP_HE,
00434 "ja", ULMBCS_GRP_JA,
00435 "ko", ULMBCS_GRP_KO,
00436
00437
00438 "mk", ULMBCS_GRP_RU,
00439
00440
00441 "pl", ULMBCS_GRP_L2,
00442
00443 "ro", ULMBCS_GRP_L2,
00444 "ru", ULMBCS_GRP_RU,
00445 "sh", ULMBCS_GRP_L2,
00446 "sk", ULMBCS_GRP_L2,
00447 "sl", ULMBCS_GRP_L2,
00448 "sq", ULMBCS_GRP_L2,
00449 "sr", ULMBCS_GRP_RU,
00450
00451 "th", ULMBCS_GRP_TH,
00452 "tr", ULMBCS_GRP_TR,
00453 "uk", ULMBCS_GRP_RU,
00454
00455 "zh_TW", ULMBCS_GRP_TW,
00456 "zh", ULMBCS_GRP_CN,
00457 NULL, ULMBCS_GRP_L1
00458 };
00459
00460
00461 ulmbcs_byte_t
00462 FindLMBCSLocale(const char *LocaleID)
00463 {
00464 struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap;
00465
00466 if ((!LocaleID) || (!*LocaleID))
00467 {
00468 return 0;
00469 }
00470
00471 while (pTable->LocaleID)
00472 {
00473 if (*pTable->LocaleID == *LocaleID)
00474 {
00475
00476 if (strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0)
00477 return pTable->OptGroup;
00478 }
00479 else
00480 if (*pTable->LocaleID > *LocaleID)
00481 break;
00482 pTable++;
00483 }
00484 return ULMBCS_GRP_L1;
00485 }
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503 #define DECLARE_LMBCS_DATA(n) \
00504 static const UConverterImpl _LMBCSImpl##n={\
00505 UCNV_LMBCS_##n,\
00506 NULL,NULL,\
00507 _LMBCSOpen##n,\
00508 _LMBCSClose,\
00509 NULL,\
00510 _LMBCSToUnicodeWithOffsets,\
00511 _LMBCSToUnicodeWithOffsets,\
00512 _LMBCSFromUnicode,\
00513 _LMBCSFromUnicode,\
00514 _LMBCSGetNextUChar,\
00515 NULL\
00516 };\
00517 const UConverterStaticData _LMBCSStaticData##n={\
00518 sizeof(UConverterStaticData),\
00519 "LMBCS-" #n,\
00520 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 1,\
00521 { 0x3f, 0, 0, 0 },1,FALSE,FALSE,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \
00522 };\
00523 const UConverterSharedData _LMBCSData##n={\
00524 sizeof(UConverterSharedData), ~((uint32_t) 0),\
00525 NULL, NULL, &_LMBCSStaticData##n, FALSE, &_LMBCSImpl##n, \
00526 0 \
00527 };
00528
00529
00530
00531
00532
00533 #define DEFINE_LMBCS_OPEN(n) \
00534 static void \
00535 _LMBCSOpen##n(UConverter* _this,const char* name,const char* locale,uint32_t options,UErrorCode* err) \
00536 { _LMBCSOpenWorker(_this, name,locale,options, err, n);}
00537
00538
00539
00540
00541
00542
00543 void
00544 _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *args,
00545 UErrorCode* err);
00546
00547
00548 void
00549 _LMBCSFromUnicode(UConverterFromUnicodeArgs *args,
00550 UErrorCode* err);
00551
00552 UChar32
00553 _LMBCSGetNextUChar(UConverterToUnicodeArgs *args,
00554 UErrorCode* err);
00555
00556
00557
00558 static void
00559 _LMBCSOpenWorker(UConverter* _this,
00560 const char* name,
00561 const char* locale,
00562 uint32_t options,
00563 UErrorCode* err,
00564 ulmbcs_byte_t OptGroup
00565 )
00566 {
00567 UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
00568 if(extraInfo != NULL)
00569 {
00570 ulmbcs_byte_t i;
00571 ulmbcs_byte_t imax;
00572 imax = sizeof(extraInfo->OptGrpConverter)/sizeof(extraInfo->OptGrpConverter[0]);
00573
00574 for (i=0; i < imax; i++)
00575 {
00576 extraInfo->OptGrpConverter[i] =
00577 (OptGroupByteToCPName[i] != NULL) ?
00578 ucnv_open(OptGroupByteToCPName[i], err) : NULL;
00579 }
00580 extraInfo->OptGroup = OptGroup;
00581 extraInfo->localeConverterIndex = FindLMBCSLocale(locale);
00582 }
00583 else
00584 {
00585 *err = U_MEMORY_ALLOCATION_ERROR;
00586 }
00587 _this->extraInfo = extraInfo;
00588 }
00589
00590 static void
00591 _LMBCSClose(UConverter * _this)
00592 {
00593 if (_this->extraInfo != NULL)
00594 {
00595 ulmbcs_byte_t Ix;
00596 UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
00597
00598 for (Ix=0; Ix < ULMBCS_GRP_UNICODE; Ix++)
00599 {
00600 if (extraInfo->OptGrpConverter[Ix] != NULL)
00601 ucnv_close (extraInfo->OptGrpConverter[Ix]);
00602 }
00603 uprv_free (_this->extraInfo);
00604 }
00605 }
00606
00607
00608 DEFINE_LMBCS_OPEN(1)
00609 DEFINE_LMBCS_OPEN(2)
00610 DEFINE_LMBCS_OPEN(3)
00611 DEFINE_LMBCS_OPEN(4)
00612 DEFINE_LMBCS_OPEN(5)
00613 DEFINE_LMBCS_OPEN(6)
00614 DEFINE_LMBCS_OPEN(8)
00615 DEFINE_LMBCS_OPEN(11)
00616 DEFINE_LMBCS_OPEN(16)
00617 DEFINE_LMBCS_OPEN(17)
00618 DEFINE_LMBCS_OPEN(18)
00619 DEFINE_LMBCS_OPEN(19)
00620
00621
00622 DECLARE_LMBCS_DATA(1)
00623 DECLARE_LMBCS_DATA(2)
00624 DECLARE_LMBCS_DATA(3)
00625 DECLARE_LMBCS_DATA(4)
00626 DECLARE_LMBCS_DATA(5)
00627 DECLARE_LMBCS_DATA(6)
00628 DECLARE_LMBCS_DATA(8)
00629 DECLARE_LMBCS_DATA(11)
00630 DECLARE_LMBCS_DATA(16)
00631 DECLARE_LMBCS_DATA(17)
00632 DECLARE_LMBCS_DATA(18)
00633 DECLARE_LMBCS_DATA(19)
00634
00635
00636
00637
00638
00639
00640 #if LMBCS_DEBUG
00641 #define MyAssert(b) {if (!(b)) {*(char *)0 = 1;}}
00642 #else
00643 #define MyAssert(b)
00644 #endif
00645
00646
00647
00648
00649
00650
00651
00652
00653 size_t
00654 LMBCSConversionWorker (
00655 UConverterDataLMBCS * extraInfo,
00656 ulmbcs_byte_t group,
00657 ulmbcs_byte_t * pStartLMBCS,
00658 UChar * pUniChar,
00659 ulmbcs_byte_t * lastConverterIndex,
00660 UBool * groups_tried
00661 )
00662 {
00663 ulmbcs_byte_t * pLMBCS = pStartLMBCS;
00664 UConverter * xcnv = extraInfo->OptGrpConverter[group];
00665
00666 ulmbcs_byte_t mbChar [ULMBCS_CHARSIZE_MAX];
00667 ulmbcs_byte_t * pmbChar = mbChar;
00668 UBool isDoubleByteGroup = (UBool)((group >= ULMBCS_DOUBLEOPTGROUP_START) ? TRUE : FALSE);
00669 UErrorCode localErr = U_ZERO_ERROR;
00670 int bytesConverted =0;
00671
00672 MyAssert(xcnv);
00673 MyAssert(group<ULMBCS_GRP_UNICODE);
00674
00675 ucnv_fromUnicode(
00676 xcnv,
00677 (char **)&pmbChar,(char *)mbChar+sizeof(mbChar),
00678 (const UChar **)&pUniChar,pUniChar+1,
00679 NULL,TRUE,&localErr);
00680 bytesConverted = pmbChar - mbChar;
00681 pmbChar = mbChar;
00682
00683
00684
00685 if (*mbChar == xcnv->subChar[0] || U_FAILURE(localErr) || !bytesConverted )
00686 {
00687 groups_tried[group] = TRUE;
00688 return 0;
00689 }
00690 *lastConverterIndex = group;
00691
00692
00693
00694
00695 MyAssert((*pmbChar <= ULMBCS_C0END) || (*pmbChar >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
00696
00697
00698 if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
00699 {
00700 *pLMBCS++ = group;
00701 if (bytesConverted == 1 && isDoubleByteGroup)
00702 {
00703 *pLMBCS++ = group;
00704 }
00705 }
00706
00707 do
00708 {
00709 *pLMBCS++ = *pmbChar++;
00710 }
00711 while(--bytesConverted);
00712
00713 return (pLMBCS - pStartLMBCS);
00714 }
00715
00716
00717
00718
00719
00720 size_t
00721 LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar)
00722 {
00723
00724 uint8_t LowCh = (uint8_t)(uniChar & 0x00FF);
00725 uint8_t HighCh = (uint8_t)(uniChar >> 8);
00726
00727 *pLMBCS++ = ULMBCS_GRP_UNICODE;
00728
00729 if (LowCh == 0)
00730 {
00731 *pLMBCS++ = ULMBCS_UNICOMPATZERO;
00732 *pLMBCS++ = HighCh;
00733 }
00734 else
00735 {
00736 *pLMBCS++ = HighCh;
00737 *pLMBCS++ = LowCh;
00738 }
00739 return ULMBCS_UNICODE_SIZE;
00740 }
00741
00742
00743
00744
00745 void
00746 _LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
00747 UErrorCode* err)
00748 {
00749 ulmbcs_byte_t lastConverterIndex = 0;
00750 UChar uniChar;
00751 ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX];
00752 ulmbcs_byte_t * pLMBCS;
00753 int bytes_written;
00754 UBool groups_tried[ULMBCS_GRP_LAST];
00755 UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
00756 int sourceIndex = 0;
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782 while (args->source < args->sourceLimit && !U_FAILURE(*err))
00783 {
00784 if (args->target >= args->targetLimit)
00785 {
00786 *err = U_BUFFER_OVERFLOW_ERROR;
00787 break;
00788 }
00789 uniChar = *(args->source);
00790 bytes_written = 0;
00791 pLMBCS = LMBCS;
00792
00793
00794
00795
00796
00797 if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) ||
00798 uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR ||
00799 uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE
00800 )
00801 {
00802 *pLMBCS++ = (ulmbcs_byte_t ) uniChar;
00803 bytes_written = 1;
00804 }
00805
00806
00807 if (!bytes_written)
00808 {
00809
00810 ulmbcs_byte_t group = FindLMBCSUniRange(uniChar);
00811
00812 if (group == ULMBCS_GRP_UNICODE)
00813 {
00814 pLMBCS += LMBCSConvertUni(pLMBCS,uniChar);
00815
00816 bytes_written = pLMBCS - LMBCS;
00817 }
00818 else if (group == ULMBCS_GRP_CTRL)
00819 {
00820
00821 if (uniChar <= ULMBCS_C0END)
00822 {
00823 *pLMBCS++ = ULMBCS_GRP_CTRL;
00824 *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar);
00825 }
00826 else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET)
00827 {
00828 *pLMBCS++ = ULMBCS_GRP_CTRL;
00829 *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF);
00830 }
00831 bytes_written = pLMBCS - LMBCS;
00832 }
00833 else if (group < ULMBCS_GRP_UNICODE)
00834 {
00835
00836 bytes_written = LMBCSConversionWorker (
00837 extraInfo, group, pLMBCS, &uniChar,
00838 &lastConverterIndex, groups_tried);
00839 }
00840 if (!bytes_written)
00841 {
00842 memset(groups_tried, 0, sizeof(groups_tried));
00843
00844
00845 if (extraInfo->OptGroup != 1
00846 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))
00847 {
00848 bytes_written = LMBCSConversionWorker (extraInfo,
00849 extraInfo->OptGroup, pLMBCS, &uniChar,
00850 &lastConverterIndex, groups_tried);
00851 }
00852
00853 if (!bytes_written
00854 && (extraInfo->localeConverterIndex)
00855 && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex)))
00856 {
00857 bytes_written = LMBCSConversionWorker (extraInfo,
00858 extraInfo->localeConverterIndex, pLMBCS, &uniChar,
00859 &lastConverterIndex, groups_tried);
00860 }
00861
00862 if (!bytes_written
00863 && (lastConverterIndex)
00864 && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex)))
00865 {
00866 bytes_written = LMBCSConversionWorker (extraInfo,
00867 lastConverterIndex, pLMBCS, &uniChar,
00868 &lastConverterIndex, groups_tried);
00869
00870 }
00871 if (!bytes_written)
00872 {
00873
00874 ulmbcs_byte_t grp_start;
00875 ulmbcs_byte_t grp_end;
00876 ulmbcs_byte_t grp_ix;
00877 grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS)
00878 ? ULMBCS_DOUBLEOPTGROUP_START
00879 : ULMBCS_GRP_L1);
00880 grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS)
00881 ? ULMBCS_GRP_LAST
00882 : ULMBCS_GRP_TH);
00883 for (grp_ix = grp_start;
00884 grp_ix <= grp_end && !bytes_written;
00885 grp_ix++)
00886 {
00887 if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix])
00888 {
00889 bytes_written = LMBCSConversionWorker (extraInfo,
00890 grp_ix, pLMBCS, &uniChar,
00891 &lastConverterIndex, groups_tried);
00892 }
00893 }
00894
00895
00896 if (!bytes_written && grp_start == ULMBCS_GRP_L1)
00897 {
00898 bytes_written = LMBCSConversionWorker (extraInfo,
00899 ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
00900 &lastConverterIndex, groups_tried);
00901 }
00902 }
00903
00904 if (!bytes_written)
00905 {
00906
00907 pLMBCS += LMBCSConvertUni(pLMBCS, uniChar);
00908 bytes_written = pLMBCS - LMBCS;
00909 }
00910 }
00911 }
00912
00913
00914 args->source++;
00915 pLMBCS = LMBCS;
00916 while (args->target < args->targetLimit && bytes_written--)
00917 {
00918 *(args->target)++ = *pLMBCS++;
00919 if (args->offsets)
00920 {
00921 *(args->offsets)++ = sourceIndex;
00922 }
00923 }
00924 sourceIndex++;
00925 if (bytes_written > 0)
00926 {
00927
00928
00929
00930
00931 uint8_t * pErrorBuffer = args->converter->charErrorBuffer;
00932 *err = U_BUFFER_OVERFLOW_ERROR;
00933 args->converter->charErrorBufferLength = (int8_t)bytes_written;
00934 while (bytes_written--)
00935 {
00936 *pErrorBuffer++ = *pLMBCS++;
00937 }
00938 }
00939 }
00940 }
00941
00942
00943
00944
00945
00946
00947 UChar
00948 GetUniFromLMBCSUni(char const ** ppLMBCSin)
00949 {
00950 uint8_t HighCh = *(*ppLMBCSin)++;
00951 uint8_t LowCh = *(*ppLMBCSin)++;
00952
00953 if (HighCh == ULMBCS_UNICOMPATZERO )
00954 {
00955 HighCh = LowCh;
00956 LowCh = 0;
00957 }
00958 return (UChar)((HighCh << 8) | LowCh);
00959 }
00960
00961
00962
00963
00964
00965
00966
00967 #define CHECK_SOURCE_LIMIT(index) \
00968 if (args->source+index > args->sourceLimit){\
00969 *err = U_TRUNCATED_CHAR_FOUND;\
00970 args->source = saveSource;\
00971 return 0xffff;}
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981 UChar32
00982 _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
00983 UErrorCode* err,
00984 UBool returnUTF32)
00985 {
00986 ulmbcs_byte_t CurByte;
00987 UChar32 uniChar;
00988 const char * saveSource;
00989
00990
00991 if (args->source >= args->sourceLimit)
00992 {
00993 *err = U_ILLEGAL_ARGUMENT_ERROR;
00994 return 0xffff;
00995 }
00996
00997 CurByte = *((ulmbcs_byte_t *) (saveSource = args->source++));
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011 if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START))
01012 || (CurByte == 0)
01013 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR
01014 || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE)
01015 {
01016 uniChar = CurByte;
01017 }
01018 else
01019 {
01020 UConverterDataLMBCS * extraInfo;
01021 ulmbcs_byte_t group;
01022 UConverter* cnv;
01023
01024 if (CurByte == ULMBCS_GRP_CTRL)
01025 {
01026 ulmbcs_byte_t C0C1byte;
01027 CHECK_SOURCE_LIMIT(1);
01028 C0C1byte = *(args->source)++;
01029 uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
01030 }
01031 else
01032 if (CurByte == ULMBCS_GRP_UNICODE)
01033 {
01034 UChar second;
01035 CHECK_SOURCE_LIMIT(2);
01036
01037 uniChar = GetUniFromLMBCSUni(&(args->source));
01038
01039
01040
01041
01042 if(returnUTF32 && UTF_IS_FIRST_SURROGATE(uniChar) && (args->source+3 <= args->sourceLimit)
01043 && *(args->source)++ == ULMBCS_GRP_UNICODE
01044 && UTF_IS_SECOND_SURROGATE(second = GetUniFromLMBCSUni(&(args->source))))
01045 {
01046 uniChar = UTF16_GET_PAIR_VALUE(uniChar, second);
01047 }
01048 }
01049 else if (CurByte <= ULMBCS_CTRLOFFSET)
01050 {
01051 group = CurByte;
01052 extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
01053 cnv = extraInfo->OptGrpConverter[group];
01054
01055 if (!cnv)
01056 {
01057
01058 *err = U_INVALID_CHAR_FOUND;
01059 }
01060
01061 else if (group >= ULMBCS_DOUBLEOPTGROUP_START)
01062 {
01063
01064 CHECK_SOURCE_LIMIT(2);
01065
01066
01067 if (*args->source == group) {
01068
01069 ++args->source;
01070 uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 1, FALSE);
01071 } else {
01072
01073 const char *newLimit = args->source + 2;
01074 uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, newLimit, FALSE);
01075 args->source = newLimit;
01076 }
01077 }
01078 else {
01079 CHECK_SOURCE_LIMIT(1);
01080 CurByte = *(args->source)++;
01081
01082 if (CurByte >= ULMBCS_C1START)
01083 {
01084 uniChar = cnv->sharedData->table->sbcs.toUnicode[CurByte];
01085 }
01086 else
01087 {
01088
01089
01090
01091 const char *s;
01092 char bytes[2];
01093
01094 extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
01095 cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT];
01096
01097
01098 bytes[0] = group;
01099 bytes[1] = CurByte;
01100 s = bytes;
01101 uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &s, bytes + 2, FALSE);
01102 }
01103 }
01104 }
01105 else if (CurByte >= ULMBCS_C1START)
01106 {
01107 extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
01108 group = extraInfo->OptGroup;
01109 cnv = extraInfo->OptGrpConverter[group];
01110 if (group >= ULMBCS_DOUBLEOPTGROUP_START)
01111 {
01112 if (!_MBCSIsLeadByte(cnv->sharedData, CurByte))
01113 {
01114 CHECK_SOURCE_LIMIT(0);
01115
01116
01117 --args->source;
01118 uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 1, FALSE);
01119 }
01120 else
01121 {
01122 CHECK_SOURCE_LIMIT(1);
01123
01124
01125 --args->source;
01126
01127
01128 uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 2, FALSE);
01129 }
01130 }
01131 else
01132 {
01133 uniChar = cnv->sharedData->table->sbcs.toUnicode[CurByte];
01134 }
01135 }
01136 }
01137 if (((uint32_t)uniChar - 0xfffe) <= 1)
01138 {
01139
01140
01141
01142
01143 UConverterToUnicodeArgs cbArgs = *args;
01144 UChar * pUniChar = (UChar *)&uniChar;
01145 UConverterCallbackReason reason;
01146
01147 if (uniChar == 0xfffe)
01148 {
01149 reason = UCNV_UNASSIGNED;
01150 *err = U_INVALID_CHAR_FOUND;
01151 }
01152 else
01153 {
01154 reason = UCNV_ILLEGAL;
01155 *err = U_ILLEGAL_CHAR_FOUND;
01156 }
01157
01158 cbArgs.target = pUniChar;
01159 cbArgs.targetLimit = pUniChar + 1;
01160 cbArgs.converter->fromCharErrorBehaviour(cbArgs.converter->toUContext,
01161 &cbArgs,
01162 saveSource,
01163 args->sourceLimit - saveSource,
01164 reason,
01165 err);
01166 }
01167 return uniChar;
01168 }
01169
01170
01171
01172
01173 UChar32
01174 _LMBCSGetNextUChar(UConverterToUnicodeArgs* args,
01175 UErrorCode* err)
01176 {
01177 return _LMBCSGetNextUCharWorker(args, err, TRUE);
01178 }
01179
01180
01181
01182
01183 void
01184 _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
01185 UErrorCode* err)
01186 {
01187 UChar uniChar;
01188 const char * saveSource;
01189 const char * pStartLMBCS = args->source;
01190
01191 if (args->targetLimit == args->target)
01192 {
01193 *err = U_BUFFER_OVERFLOW_ERROR;
01194 return;
01195 }
01196
01197
01198 while (!*err && args->sourceLimit > args->source && args->targetLimit > args->target)
01199 {
01200 saveSource = args->source;
01201
01202 if (args->converter->invalidCharLength)
01203 {
01204 char LMBCS [ULMBCS_CHARSIZE_MAX];
01205 char *pLMBCS = LMBCS, *saveSource, *saveSourceLimit;
01206 size_t size_old = args->converter->invalidCharLength;
01207
01208
01209 size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
01210 size_t size_new_maybe_2 = args->sourceLimit - args->source;
01211 size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
01212
01213
01214 uprv_memcpy(LMBCS, args->converter->invalidCharBuffer, size_old);
01215 uprv_memcpy(LMBCS + size_old, args->source, size_new);
01216 saveSource = (char*)args->source;
01217 saveSourceLimit = (char*)args->sourceLimit;
01218 args->source = pLMBCS;
01219 args->sourceLimit = pLMBCS+size_old+size_new;
01220 uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err, FALSE);
01221 pLMBCS = (char*)args->source;
01222 args->source =saveSource;
01223 args->sourceLimit = saveSourceLimit;
01224 args->source += (pLMBCS - LMBCS - size_old);
01225
01226 if (*err == U_TRUNCATED_CHAR_FOUND && !args->flush)
01227 {
01228
01229 int8_t savebytes = (int8_t)(size_old+size_new);
01230 args->converter->invalidCharLength = savebytes;
01231 uprv_memcpy(args->converter->invalidCharBuffer, LMBCS, savebytes);
01232 args->source = args->sourceLimit;
01233 *err = U_ZERO_ERROR;
01234 return;
01235 }
01236 else
01237 {
01238
01239 args->converter->invalidCharLength = 0;
01240 }
01241 }
01242 else
01243 {
01244 uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err, FALSE);
01245 }
01246 if (U_SUCCESS(*err))
01247 {
01248 if (uniChar < 0xfffe)
01249 {
01250 *(args->target)++ = uniChar;
01251 if(args->offsets)
01252 {
01253 *(args->offsets)++ = saveSource - pStartLMBCS;
01254 }
01255 }
01256 else if (uniChar == 0xfffe)
01257 {
01258 *err = U_INVALID_CHAR_FOUND;
01259 }
01260 else
01261 {
01262 *err = U_ILLEGAL_CHAR_FOUND;
01263 }
01264 }
01265 }
01266
01267 if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target)
01268 {
01269 *err = U_BUFFER_OVERFLOW_ERROR;
01270 }
01271
01272
01273 if ((*err == U_TRUNCATED_CHAR_FOUND) && !args->flush )
01274 {
01275 int8_t savebytes = (int8_t)(args->sourceLimit - saveSource);
01276 args->converter->invalidCharLength = (int8_t)savebytes;
01277 uprv_memcpy(args->converter->invalidCharBuffer, saveSource, savebytes);
01278 args->source = args->sourceLimit;
01279 *err = U_ZERO_ERROR;
01280 }
01281 }
01282
01283
01284
01285