00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "cmemory.h"
00023 #include "unicode/utypes.h"
00024 #include "ucmp16.h"
00025 #include "ucmp8.h"
00026 #include "unicode/ucnv_err.h"
00027 #include "ucnv_bld.h"
00028 #include "unicode/ucnv.h"
00029 #include "ucnv_cnv.h"
00030
00031
00032
00033
00034
00035
00036
00037 static const uint32_t MAXIMUM_UCS2 = 0x0000FFFF;
00038 static const uint32_t MAXIMUM_UTF = 0x0010FFFF;
00039 static const uint32_t MAXIMUM_UCS4 = 0x7FFFFFFF;
00040 static const int8_t HALF_SHIFT = 10;
00041 static const uint32_t HALF_BASE = 0x0010000;
00042 static const uint32_t HALF_MASK = 0x3FF;
00043 static const uint32_t SURROGATE_HIGH_START = 0xD800;
00044 static const uint32_t SURROGATE_HIGH_END = 0xDBFF;
00045 static const uint32_t SURROGATE_LOW_START = 0xDC00;
00046 static const uint32_t SURROGATE_LOW_END = 0xDFFF;
00047 static const uint32_t SURROGATE_LOW_BASE = 9216;
00048
00049 static const uint32_t offsetsFromUTF8[7] = {0,
00050 (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
00051 (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
00052 };
00053
00054
00055
00056 static const int8_t bytesFromUTF8[256] = {
00057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00060 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00061 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00062 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00063 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00064 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
00065 };
00066
00067
00068
00069 #define INVALID_UTF8_TAIL(utf8) (((utf8) & 0xC0) != 0x80)
00070
00077 static UBool
00078 T_UConverter_toUnicode_InvalidChar_Callback(UConverterToUnicodeArgs * args,
00079 UErrorCode *err)
00080 {
00081 UConverter *converter = args->converter;
00082
00083 if (U_SUCCESS(*err))
00084 {
00085 *err = U_ILLEGAL_CHAR_FOUND;
00086 }
00087
00088
00089 uprv_memcpy(converter->invalidCharBuffer,
00090 converter->toUBytes,
00091 converter->invalidCharLength);
00092
00093
00094 args->converter->fromCharErrorBehaviour(converter->toUContext,
00095 args,
00096 converter->invalidCharBuffer,
00097 converter->invalidCharLength,
00098 UCNV_ILLEGAL,
00099 err);
00100
00101 return (UBool)U_FAILURE(*err);
00102 }
00103
00104 static UBool
00105 T_UConverter_toUnicode_InvalidChar_OffsetCallback(UConverterToUnicodeArgs * args,
00106 int32_t currentOffset,
00107 UErrorCode *err)
00108 {
00109 int32_t *saveOffsets = args->offsets;
00110 UBool result;
00111
00112 result = T_UConverter_toUnicode_InvalidChar_Callback(args, err);
00113
00114 while (saveOffsets < args->offsets)
00115 {
00116 *(saveOffsets++) = currentOffset;
00117 }
00118 return result;
00119 }
00120
00121 U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
00122 UErrorCode * err)
00123 {
00124 const unsigned char *mySource = (unsigned char *) args->source;
00125 UChar *myTarget = args->target;
00126 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
00127 const UChar *targetLimit = args->targetLimit;
00128 unsigned char *toUBytes = args->converter->toUBytes;
00129 UBool invalidTailChar = FALSE;
00130 uint32_t ch, ch2 = 0, i;
00131 uint32_t inBytes;
00132
00133 if (U_FAILURE(*err))
00134 {
00135 return;
00136 }
00137
00138
00139 if (args->converter->toUnicodeStatus && myTarget < targetLimit)
00140 {
00141 inBytes = args->converter->toULength;
00142 i = args->converter->invalidCharLength;
00143
00144 ch = args->converter->toUnicodeStatus;
00145 args->converter->toUnicodeStatus = 0;
00146 goto morebytes;
00147 }
00148
00149
00150 while (mySource < sourceLimit && myTarget < targetLimit)
00151 {
00152 ch = *(mySource++);
00153 if (ch < 0x80)
00154 {
00155 *(myTarget++) = (UChar) ch;
00156 }
00157 else
00158 {
00159
00160 toUBytes[0] = (char)ch;
00161 inBytes = bytesFromUTF8[ch];
00162 i = 1;
00163
00164 morebytes:
00165 while (i < inBytes)
00166 {
00167 if (mySource < sourceLimit)
00168 {
00169 toUBytes[i] = (char) (ch2 = *(mySource++));
00170 if (INVALID_UTF8_TAIL(ch2))
00171 {
00172 *err = U_TRUNCATED_CHAR_FOUND;
00173 invalidTailChar = TRUE;
00174 break;
00175 }
00176 ch = (ch << 6) + ch2;
00177 i++;
00178 }
00179 else
00180 {
00181 if (args->flush)
00182 {
00183 if (U_SUCCESS(*err))
00184 {
00185 *err = U_TRUNCATED_CHAR_FOUND;
00186 }
00187 }
00188 else
00189 {
00190 args->converter->toUnicodeStatus = ch;
00191 args->converter->toULength = (int8_t) inBytes;
00192 args->converter->invalidCharLength = (int8_t) i;
00193 }
00194 goto donefornow;
00195 }
00196 }
00197
00198
00199 ch -= offsetsFromUTF8[inBytes];
00200
00201 if (i == inBytes && ch <= MAXIMUM_UTF)
00202 {
00203
00204 if (ch <= MAXIMUM_UCS2)
00205 {
00206
00207 *(myTarget++) = (UChar) ch;
00208 }
00209 else
00210 {
00211
00212 ch -= HALF_BASE;
00213 *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
00214 ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
00215 if (myTarget < targetLimit)
00216 {
00217 *(myTarget++) = (UChar)ch;
00218 }
00219 else
00220 {
00221
00222 args->converter->UCharErrorBuffer[0] = (UChar) ch;
00223 args->converter->UCharErrorBufferLength = 1;
00224 *err = U_BUFFER_OVERFLOW_ERROR;
00225 break;
00226 }
00227 }
00228 }
00229 else
00230 {
00231 args->source = (const char *) mySource;
00232 args->target = myTarget;
00233 args->converter->invalidCharLength = (int8_t)i;
00234 if (T_UConverter_toUnicode_InvalidChar_Callback(args, err))
00235 {
00236
00237 break;
00238 }
00239 args->converter->invalidCharLength = 0;
00240 mySource = (unsigned char *) args->source;
00241 myTarget = args->target;
00242 if (invalidTailChar)
00243 {
00244
00245 if (myTarget < targetLimit)
00246 {
00247 *(myTarget++) = (UChar) ch2;
00248 invalidTailChar = FALSE;
00249 }
00250 else
00251 {
00252
00253 args->converter->UCharErrorBuffer[0] = (UChar) ch2;
00254 args->converter->UCharErrorBufferLength = 1;
00255 *err = U_BUFFER_OVERFLOW_ERROR;
00256 break;
00257 }
00258 }
00259 }
00260 }
00261 }
00262
00263 donefornow:
00264 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
00265 {
00266
00267 *err = U_BUFFER_OVERFLOW_ERROR;
00268 }
00269
00270 args->target = myTarget;
00271 args->source = (const char *) mySource;
00272 }
00273
00274 U_CFUNC void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
00275 UErrorCode * err)
00276 {
00277 const unsigned char *mySource = (unsigned char *) args->source;
00278 UChar *myTarget = args->target;
00279 int32_t *myOffsets = args->offsets;
00280 int32_t offsetNum = 0;
00281 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
00282 const UChar *targetLimit = args->targetLimit;
00283 unsigned char *toUBytes = args->converter->toUBytes;
00284 UBool invalidTailChar = FALSE;
00285 uint32_t ch, ch2 = 0, i;
00286 uint32_t inBytes;
00287
00288
00289 if (args->converter->toUnicodeStatus && myTarget < targetLimit)
00290 {
00291 inBytes = args->converter->toULength;
00292 i = args->converter->invalidCharLength;
00293
00294 ch = args->converter->toUnicodeStatus;
00295 args->converter->toUnicodeStatus = 0;
00296 goto morebytes;
00297 }
00298
00299 while (mySource < sourceLimit && myTarget < targetLimit)
00300 {
00301 ch = *(mySource++);
00302 if (ch < 0x80)
00303 {
00304 *(myTarget++) = (UChar) ch;
00305 *(myOffsets++) = offsetNum++;
00306 }
00307 else
00308 {
00309 toUBytes[0] = (char)ch;
00310 inBytes = bytesFromUTF8[ch];
00311 i = 1;
00312
00313 morebytes:
00314 while (i < inBytes)
00315 {
00316 if (mySource < sourceLimit)
00317 {
00318 toUBytes[i] = (char) (ch2 = *(mySource++));
00319 if (INVALID_UTF8_TAIL(ch2))
00320 {
00321 *err = U_TRUNCATED_CHAR_FOUND;
00322 invalidTailChar = TRUE;
00323 break;
00324 }
00325 ch = (ch << 6) + ch2;
00326 i++;
00327 }
00328 else
00329 {
00330 if (args->flush)
00331 {
00332 if (U_SUCCESS(*err))
00333 {
00334 *err = U_TRUNCATED_CHAR_FOUND;
00335 args->converter->toUnicodeStatus = 0;
00336 }
00337 }
00338 else
00339 {
00340 args->converter->toUnicodeStatus = ch;
00341 args->converter->toULength = (int8_t)inBytes;
00342 args->converter->invalidCharLength = (int8_t)i;
00343 }
00344 goto donefornow;
00345 }
00346 }
00347
00348
00349 ch -= offsetsFromUTF8[inBytes];
00350
00351 if (i == inBytes && ch <= MAXIMUM_UTF)
00352 {
00353
00354 if (ch <= MAXIMUM_UCS2)
00355 {
00356
00357 *(myTarget++) = (UChar) ch;
00358 *(myOffsets++) = offsetNum;
00359 }
00360 else
00361 {
00362
00363 *(myOffsets++) = offsetNum;
00364 ch -= HALF_BASE;
00365 *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
00366 ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
00367 if (myTarget < targetLimit)
00368 {
00369 *(myTarget++) = (UChar)ch;
00370 *(myOffsets++) = offsetNum;
00371 }
00372 else
00373 {
00374 args->converter->UCharErrorBuffer[0] = (UChar) ch;
00375 args->converter->UCharErrorBufferLength = 1;
00376 *err = U_BUFFER_OVERFLOW_ERROR;
00377 }
00378 }
00379 offsetNum += i;
00380 }
00381 else
00382 {
00383 UBool useOffset;
00384
00385 args->source = (const char *) mySource;
00386 args->target = myTarget;
00387 args->offsets = myOffsets;
00388 args->converter->invalidCharLength = (int8_t)i;
00389 if (T_UConverter_toUnicode_InvalidChar_OffsetCallback(args,
00390 offsetNum, err))
00391 {
00392
00393 break;
00394 }
00395
00396 args->converter->invalidCharLength = 0;
00397 mySource = (unsigned char *) args->source;
00398 myTarget = args->target;
00399
00400 useOffset = (UBool)(myOffsets != args->offsets);
00401 myOffsets = args->offsets;
00402 offsetNum += i;
00403
00404 if (invalidTailChar)
00405 {
00406
00407 if (myTarget < targetLimit)
00408 {
00409 *(myTarget++) = (UChar) ch2;
00410 *myOffsets = offsetNum++;
00411 if (useOffset)
00412 {
00413
00414 myOffsets++;
00415 }
00416 invalidTailChar = FALSE;
00417 }
00418 else
00419 {
00420
00421 args->converter->UCharErrorBuffer[0] = (UChar) ch2;
00422 args->converter->UCharErrorBufferLength = 1;
00423 *err = U_BUFFER_OVERFLOW_ERROR;
00424 break;
00425 }
00426 }
00427 }
00428 }
00429 }
00430
00431 donefornow:
00432 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
00433 {
00434 *err = U_BUFFER_OVERFLOW_ERROR;
00435 }
00436
00437 args->target = myTarget;
00438 args->source = (const char *) mySource;
00439 args->offsets = myOffsets;
00440 }
00441
00442 U_CFUNC void T_UConverter_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
00443 UErrorCode * err)
00444 {
00445 const UChar *mySource = args->source;
00446 unsigned char *myTarget = (unsigned char *) args->target;
00447 const UChar *sourceLimit = args->sourceLimit;
00448 const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
00449 uint32_t ch, ch2;
00450 int16_t indexToWrite;
00451 char temp[4];
00452
00453 if (args->converter->fromUnicodeStatus && myTarget < targetLimit)
00454 {
00455 ch = args->converter->fromUnicodeStatus;
00456 args->converter->fromUnicodeStatus = 0;
00457 goto lowsurogate;
00458 }
00459
00460 while (mySource < sourceLimit && myTarget < targetLimit)
00461 {
00462 ch = *(mySource++);
00463
00464 if (ch < 0x80)
00465 {
00466 *(myTarget++) = (char) ch;
00467 }
00468 else if (ch < 0x800)
00469 {
00470 *(myTarget++) = (char) ((ch >> 6) | 0xc0);
00471 if (myTarget < targetLimit)
00472 {
00473 *(myTarget++) = (char) ((ch & 0x3f) | 0x80);
00474 }
00475 else
00476 {
00477 args->converter->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
00478 args->converter->charErrorBufferLength = 1;
00479 *err = U_BUFFER_OVERFLOW_ERROR;
00480 }
00481 }
00482 else
00483
00484 {
00485 if ((ch >= SURROGATE_HIGH_START) && (ch <= SURROGATE_HIGH_END))
00486 {
00487 lowsurogate:
00488 if (mySource < sourceLimit)
00489 {
00490 ch2 = *mySource;
00491 if ((ch2 >= SURROGATE_LOW_START) && (ch2 <= SURROGATE_LOW_END))
00492 {
00493
00494 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
00495 mySource++;
00496 }
00497 }
00498 else if (!args->flush)
00499 {
00500 args->converter->fromUnicodeStatus = ch;
00501 break;
00502 }
00503 }
00504
00505 if (ch < 0x10000)
00506 {
00507 indexToWrite = 2;
00508 temp[2] = (char) ((ch >> 12) | 0xe0);
00509 }
00510 else
00511 {
00512 indexToWrite = 3;
00513 temp[3] = (char) ((ch >> 18) | 0xf0);
00514 temp[2] = (char) (((ch >> 12) & 0x3f) | 0x80);
00515 }
00516 temp[1] = (char) (((ch >> 6) & 0x3f) | 0x80);
00517 temp[0] = (char) ((ch & 0x3f) | 0x80);
00518
00519 for (; indexToWrite >= 0; indexToWrite--)
00520 {
00521 if (myTarget < targetLimit)
00522 {
00523 *(myTarget++) = temp[indexToWrite];
00524 }
00525 else
00526 {
00527 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
00528 *err = U_BUFFER_OVERFLOW_ERROR;
00529 }
00530 }
00531 }
00532 }
00533
00534 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
00535 {
00536 *err = U_BUFFER_OVERFLOW_ERROR;
00537 }
00538
00539 args->target = (char *) myTarget;
00540 args->source = mySource;
00541 }
00542
00543 U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
00544 UErrorCode * err)
00545 {
00546 const UChar *mySource = args->source;
00547 unsigned char *myTarget = (unsigned char *) args->target;
00548 int32_t *myOffsets = args->offsets;
00549 const UChar *sourceLimit = args->sourceLimit;
00550 const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
00551 uint32_t ch, ch2;
00552 int32_t offsetNum = 0;
00553 int16_t indexToWrite;
00554 char temp[4];
00555
00556 if (args->converter->fromUnicodeStatus && myTarget < targetLimit)
00557 {
00558 ch = args->converter->fromUnicodeStatus;
00559 args->converter->fromUnicodeStatus = 0;
00560 goto lowsurogate;
00561 }
00562
00563 while (mySource < sourceLimit && myTarget < targetLimit)
00564 {
00565 ch = *(mySource++);
00566
00567 if (ch < 0x80)
00568 {
00569 *(myOffsets++) = offsetNum++;
00570 *(myTarget++) = (char) ch;
00571 }
00572 else if (ch < 0x800)
00573 {
00574 *(myOffsets++) = offsetNum;
00575 *(myTarget++) = (char) ((ch >> 6) | 0xc0);
00576 if (myTarget < targetLimit)
00577 {
00578 *(myOffsets++) = offsetNum++;
00579 *(myTarget++) = (char) ((ch & 0x3f) | 0x80);
00580 }
00581 else
00582 {
00583 args->converter->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
00584 args->converter->charErrorBufferLength = 1;
00585 *err = U_BUFFER_OVERFLOW_ERROR;
00586 }
00587 }
00588 else
00589
00590 {
00591 if ((ch >= SURROGATE_HIGH_START) && (ch <= SURROGATE_HIGH_END))
00592 {
00593 lowsurogate:
00594 if (mySource < sourceLimit)
00595 {
00596 ch2 = *mySource;
00597 if ((ch2 >= SURROGATE_LOW_START) && (ch2 <= SURROGATE_LOW_END))
00598 {
00599
00600 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
00601 mySource++;
00602 }
00603 }
00604 else if (!args->flush)
00605 {
00606 args->converter->fromUnicodeStatus = ch;
00607 break;
00608 }
00609 }
00610
00611 if (ch < 0x10000)
00612 {
00613 indexToWrite = 2;
00614 temp[2] = (char) ((ch >> 12) | 0xe0);
00615 }
00616 else
00617 {
00618 indexToWrite = 3;
00619 temp[3] = (char) ((ch >> 18) | 0xf0);
00620 temp[2] = (char) (((ch >> 12) & 0x3f) | 0x80);
00621 }
00622 temp[1] = (char) (((ch >> 6) & 0x3f) | 0x80);
00623 temp[0] = (char) ((ch & 0x3f) | 0x80);
00624
00625 for (; indexToWrite >= 0; indexToWrite--)
00626 {
00627 if (myTarget < targetLimit)
00628 {
00629 *(myOffsets++) = offsetNum;
00630 *(myTarget++) = temp[indexToWrite];
00631 }
00632 else
00633 {
00634 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
00635 *err = U_BUFFER_OVERFLOW_ERROR;
00636 }
00637 }
00638 offsetNum += (ch >= 0x10000) + 1;
00639 }
00640 }
00641
00642 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
00643 {
00644 *err = U_BUFFER_OVERFLOW_ERROR;
00645 }
00646
00647 args->target = (char *) myTarget;
00648 args->source = mySource;
00649 }
00650
00651 U_CFUNC UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
00652 UErrorCode* err)
00653 {
00654
00655 char const *sourceInitial = args->source;
00656 uint16_t extraBytesToWrite;
00657 uint8_t myByte;
00658 UChar32 ch;
00659 int8_t isLegalSequence = 1;
00660
00661
00662 if (args->source >= args->sourceLimit)
00663 {
00664 *err = U_INDEX_OUTOFBOUNDS_ERROR;
00665 return 0xffff;
00666 }
00667
00668 myByte = (uint8_t)*(args->source++);
00669 if (myByte < 0x80)
00670 {
00671 return (UChar32)myByte;
00672 }
00673 extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte];
00674 if (extraBytesToWrite == 0) {
00675 goto CALL_ERROR_FUNCTION;
00676 }
00677
00678
00679
00680 if ((args->source + extraBytesToWrite - 1) > args->sourceLimit)
00681 {
00682 *err = U_TRUNCATED_CHAR_FOUND;
00683 return 0xffff;
00684 }
00685 else
00686 {
00687 ch = myByte << 6;
00688 switch(extraBytesToWrite)
00689 {
00690
00691 case 6:
00692 ch += (myByte = (uint8_t)*(args->source++));
00693 ch <<= 6;
00694 if ((myByte & 0xC0) != 0x80)
00695 {
00696 isLegalSequence = 0;
00697 break;
00698 }
00699 case 5:
00700 ch += (myByte = *(args->source++));
00701 ch <<= 6;
00702 if ((myByte & 0xC0) != 0x80)
00703 {
00704 isLegalSequence = 0;
00705 break;
00706 }
00707 case 4:
00708 ch += (myByte = *(args->source++));
00709 ch <<= 6;
00710 if ((myByte & 0xC0) != 0x80)
00711 {
00712 isLegalSequence = 0;
00713 break;
00714 }
00715 case 3:
00716 ch += (myByte = *(args->source++));
00717 ch <<= 6;
00718 if ((myByte & 0xC0) != 0x80)
00719 {
00720 isLegalSequence = 0;
00721 break;
00722 }
00723 case 2:
00724 ch += (myByte = *(args->source++));
00725 if ((myByte & 0xC0) != 0x80)
00726 {
00727 isLegalSequence = 0;
00728 }
00729 };
00730 }
00731 ch -= offsetsFromUTF8[extraBytesToWrite];
00732
00733 if (isLegalSequence)
00734 return ch;
00735
00736 CALL_ERROR_FUNCTION:
00737 {
00738 UChar myUChar = (UChar)0xffff;
00739 UChar* myUCharPtr = &myUChar;
00740
00741 *err = U_ILLEGAL_CHAR_FOUND;
00742
00743
00744
00745 args->target = myUCharPtr;
00746 args->targetLimit = myUCharPtr + 1;
00747 args->converter->fromCharErrorBehaviour(args->converter->toUContext,
00748 args,
00749 sourceInitial,
00750 args->source-sourceInitial,
00751 UCNV_ILLEGAL,
00752 err);
00753
00754
00755 if (*err == U_BUFFER_OVERFLOW_ERROR)
00756 *err = U_ZERO_ERROR;
00757
00758 return (UChar32)myUChar;
00759 }
00760 }
00761
00762 static const UConverterImpl _UTF8Impl={
00763 UCNV_UTF8,
00764
00765 NULL,
00766 NULL,
00767
00768 NULL,
00769 NULL,
00770 NULL,
00771
00772 T_UConverter_toUnicode_UTF8,
00773 T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC,
00774 T_UConverter_fromUnicode_UTF8,
00775 T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC,
00776 T_UConverter_getNextUChar_UTF8,
00777
00778 NULL,
00779 NULL
00780 };
00781
00782
00783 const UConverterStaticData _UTF8StaticData={
00784 sizeof(UConverterStaticData),
00785 "UTF8",
00786 1208, UCNV_IBM, UCNV_UTF8, 1, 4,
00787 { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
00788 };
00789
00790
00791 const UConverterSharedData _UTF8Data={
00792 sizeof(UConverterSharedData), ~((uint32_t) 0),
00793 NULL, NULL, &_UTF8StaticData, FALSE, &_UTF8Impl,
00794 0
00795 };
00796
00797
00798
00799 U_CFUNC void T_UConverter_toUnicode_UTF16_BE (UConverterToUnicodeArgs * args,
00800 UErrorCode * err)
00801 {
00802 const unsigned char *mySource = (unsigned char *) args->source;
00803 UChar *myTarget = args->target;
00804 int32_t mySourceIndex = 0;
00805 int32_t myTargetIndex = 0;
00806 int32_t targetLength = args->targetLimit - myTarget;
00807 int32_t sourceLength = args->sourceLimit - (char *) mySource;
00808 UChar mySourceChar = 0x0000;
00809 UChar oldmySourceChar = 0x0000;
00810
00811 while (mySourceIndex < sourceLength)
00812 {
00813 if (myTargetIndex < targetLength)
00814 {
00815
00816 mySourceChar = (unsigned char) mySource[mySourceIndex++];
00817 oldmySourceChar = mySourceChar;
00818 if (args->converter->toUnicodeStatus == 0)
00819 {
00820 args->converter->toUnicodeStatus =
00821 (unsigned char) mySourceChar == 0 ? 0xFFFF : mySourceChar;
00822 }
00823 else
00824 {
00825 if (args->converter->toUnicodeStatus != 0xFFFF)
00826 mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | mySourceChar);
00827 args->converter->toUnicodeStatus = 0;
00828
00829 myTarget[myTargetIndex++] = mySourceChar;
00830 }
00831 }
00832 else
00833 {
00834 *err = U_BUFFER_OVERFLOW_ERROR;
00835 break;
00836 }
00837 }
00838
00839 if (U_SUCCESS(*err) && args->flush
00840 && (mySourceIndex == sourceLength)
00841 && (args->converter->toUnicodeStatus != 0x00))
00842 {
00843 if (U_SUCCESS(*err))
00844 {
00845 *err = U_TRUNCATED_CHAR_FOUND;
00846 args->converter->toUnicodeStatus = 0x00;
00847 }
00848 }
00849
00850 args->target += myTargetIndex;
00851 args->source += mySourceIndex;
00852 }
00853
00854 U_CFUNC void T_UConverter_fromUnicode_UTF16_BE (UConverterFromUnicodeArgs * args,
00855 UErrorCode * err)
00856 {
00857 const UChar *mySource = args->source;
00858 unsigned char *myTarget = (unsigned char *) args->target;
00859 int32_t mySourceIndex = 0;
00860 int32_t myTargetIndex = 0;
00861 int32_t targetLength = args->targetLimit - (char *) myTarget;
00862 int32_t sourceLength = args->sourceLimit - mySource;
00863 UChar mySourceChar;
00864
00865
00866 while (mySourceIndex < sourceLength)
00867 {
00868 if (myTargetIndex < targetLength)
00869 {
00870 mySourceChar = (UChar) mySource[mySourceIndex++];
00871 myTarget[myTargetIndex++] = (char) (mySourceChar >> 8);
00872 if (myTargetIndex < targetLength)
00873 {
00874 myTarget[myTargetIndex++] = (char) mySourceChar;
00875 }
00876 else
00877 {
00878 args->converter->charErrorBuffer[0] = (char) mySourceChar;
00879 args->converter->charErrorBufferLength = 1;
00880 *err = U_BUFFER_OVERFLOW_ERROR;
00881 }
00882 }
00883 else
00884 {
00885 *err = U_BUFFER_OVERFLOW_ERROR;
00886 break;
00887 }
00888 }
00889
00890 args->target += myTargetIndex;
00891 args->source += mySourceIndex;
00892 }
00893
00894 U_CFUNC UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
00895 UErrorCode* err)
00896 {
00897 UChar32 myUChar;
00898 uint16_t first;
00899
00900 if (args->source+2 > args->sourceLimit)
00901 {
00902 if (args->source >= args->sourceLimit)
00903 {
00904
00905 *err = U_INDEX_OUTOFBOUNDS_ERROR;
00906 }
00907 else
00908 {
00909
00910 *err = U_TRUNCATED_CHAR_FOUND;
00911 }
00912 return 0xffff;
00913 }
00914
00915
00916 first = (uint16_t)(((uint16_t)(*(args->source)) << 8) |((uint8_t)*((args->source)+1)));
00917 myUChar = first;
00918 args->source += 2;
00919
00920 if(UTF_IS_FIRST_SURROGATE(first)) {
00921 uint16_t second;
00922
00923 if (args->source+2 > args->sourceLimit) {
00924 *err = U_TRUNCATED_CHAR_FOUND;
00925 return 0xffff;
00926 }
00927
00928
00929 second = (uint16_t)(((uint16_t)(*(args->source)) << 8) |((uint8_t)*(args->source+1)));
00930
00931
00932 if(UTF_IS_SECOND_SURROGATE(second)) {
00933
00934 myUChar = UTF16_GET_PAIR_VALUE(first, second);
00935 args->source += 2;
00936 }
00937 }
00938
00939 return myUChar;
00940 }
00941
00942 static const UConverterImpl _UTF16BEImpl={
00943 UCNV_UTF16_BigEndian,
00944
00945 NULL,
00946 NULL,
00947
00948 NULL,
00949 NULL,
00950 NULL,
00951
00952 T_UConverter_toUnicode_UTF16_BE,
00953 NULL,
00954 T_UConverter_fromUnicode_UTF16_BE,
00955 NULL,
00956 T_UConverter_getNextUChar_UTF16_BE,
00957
00958 NULL,
00959 NULL
00960 };
00961
00962
00963 const UConverterStaticData _UTF16BEStaticData={
00964 sizeof(UConverterStaticData),
00965 "UTF16_BigEndian",
00966 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
00967 { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
00968 };
00969
00970
00971 const UConverterSharedData _UTF16BEData={
00972 sizeof(UConverterSharedData), ~((uint32_t) 0),
00973 NULL, NULL, &_UTF16BEStaticData, FALSE, &_UTF16BEImpl,
00974 0
00975 };
00976
00977
00978
00979 U_CFUNC void T_UConverter_toUnicode_UTF16_LE (UConverterToUnicodeArgs * args,
00980 UErrorCode * err)
00981 {
00982 const unsigned char *mySource = (unsigned char *) args->source;
00983 UChar *myTarget = args->target;
00984 int32_t mySourceIndex = 0;
00985 int32_t myTargetIndex = 0;
00986 int32_t targetLength = args->targetLimit - myTarget;
00987 int32_t sourceLength = args->sourceLimit - (char *) mySource;
00988 UChar mySourceChar = 0x0000;
00989
00990 while (mySourceIndex < sourceLength)
00991 {
00992 if (myTargetIndex < targetLength)
00993 {
00994
00995 mySourceChar = (unsigned char) mySource[mySourceIndex++];
00996
00997 if (args->converter->toUnicodeStatus == 0x00)
00998 {
00999 args->converter->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
01000 }
01001 else
01002 {
01003 if (args->converter->toUnicodeStatus == 0xFFFF) {
01004 mySourceChar = (UChar) (mySourceChar << 8);
01005 }
01006 else
01007 {
01008 mySourceChar <<= 8;
01009 mySourceChar |= (UChar) (args->converter->toUnicodeStatus);
01010 }
01011 args->converter->toUnicodeStatus = 0x00;
01012 myTarget[myTargetIndex++] = mySourceChar;
01013 }
01014 }
01015 else
01016 {
01017 *err = U_BUFFER_OVERFLOW_ERROR;
01018 break;
01019 }
01020 }
01021
01022
01023 if (U_SUCCESS(*err) && args->flush
01024 && (mySourceIndex == sourceLength)
01025 && (args->converter->toUnicodeStatus != 0x00))
01026 {
01027 if (U_SUCCESS(*err))
01028 {
01029 *err = U_TRUNCATED_CHAR_FOUND;
01030 args->converter->toUnicodeStatus = 0x00;
01031 }
01032 }
01033
01034 args->target += myTargetIndex;
01035 args->source += mySourceIndex;
01036 }
01037
01038 U_CFUNC void T_UConverter_fromUnicode_UTF16_LE (UConverterFromUnicodeArgs * args,
01039 UErrorCode * err)
01040 {
01041 const UChar *mySource = args->source;
01042 unsigned char *myTarget = (unsigned char *) args->target;
01043 int32_t mySourceIndex = 0;
01044 int32_t myTargetIndex = 0;
01045 int32_t targetLength = args->targetLimit - (char *) myTarget;
01046 int32_t sourceLength = args->sourceLimit - mySource;
01047 UChar mySourceChar;
01048
01049
01050 while (mySourceIndex < sourceLength)
01051 {
01052 if (myTargetIndex < targetLength)
01053 {
01054 mySourceChar = (UChar) mySource[mySourceIndex++];
01055 myTarget[myTargetIndex++] = (char) mySourceChar;
01056 if (myTargetIndex < targetLength)
01057 {
01058 myTarget[myTargetIndex++] = (char) (mySourceChar >> 8);
01059 }
01060 else
01061 {
01062 args->converter->charErrorBuffer[0] = (char) (mySourceChar >> 8);
01063 args->converter->charErrorBufferLength = 1;
01064 *err = U_BUFFER_OVERFLOW_ERROR;
01065 }
01066 }
01067 else
01068 {
01069 *err = U_BUFFER_OVERFLOW_ERROR;
01070 break;
01071 }
01072 }
01073
01074 args->target += myTargetIndex;
01075 args->source += mySourceIndex;
01076 }
01077
01078 U_CFUNC UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
01079 UErrorCode* err)
01080 {
01081 UChar32 myUChar;
01082 uint16_t first;
01083
01084 if (args->source+2 > args->sourceLimit)
01085 {
01086 if (args->source >= args->sourceLimit)
01087 {
01088
01089 *err = U_INDEX_OUTOFBOUNDS_ERROR;
01090 }
01091 else
01092 {
01093
01094 *err = U_TRUNCATED_CHAR_FOUND;
01095 }
01096
01097 return 0xffff;
01098 }
01099
01100
01101 first = (uint16_t)(((uint16_t)*((args->source)+1) << 8) | ((uint8_t)(*(args->source))));
01102 myUChar=first;
01103
01104 args->source += 2;
01105
01106 if (UTF_IS_FIRST_SURROGATE(first))
01107 {
01108 uint16_t second;
01109
01110 if (args->source+2 > args->sourceLimit)
01111 {
01112 *err = U_TRUNCATED_CHAR_FOUND;
01113 return 0xffff;
01114 }
01115
01116
01117 second = (uint16_t)(((uint16_t)*(args->source+1) << 8) |((uint8_t)(*(args->source))));
01118
01119
01120 if(UTF_IS_SECOND_SURROGATE(second))
01121 {
01122
01123 myUChar = UTF16_GET_PAIR_VALUE(first, second);
01124 args->source += 2;
01125 }
01126 }
01127
01128 return myUChar;
01129 }
01130
01131 static const UConverterImpl _UTF16LEImpl={
01132 UCNV_UTF16_LittleEndian,
01133
01134 NULL,
01135 NULL,
01136
01137 NULL,
01138 NULL,
01139 NULL,
01140
01141 T_UConverter_toUnicode_UTF16_LE,
01142 NULL,
01143 T_UConverter_fromUnicode_UTF16_LE,
01144 NULL,
01145 T_UConverter_getNextUChar_UTF16_LE,
01146
01147 NULL,
01148 NULL
01149 };
01150
01151
01152
01153 const UConverterStaticData _UTF16LEStaticData={
01154 sizeof(UConverterStaticData),
01155 "UTF16_LittleEndian",
01156 1200, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
01157 { 0xfd, 0xff, 0, 0 },2,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
01158 };
01159
01160
01161 const UConverterSharedData _UTF16LEData={
01162 sizeof(UConverterSharedData), ~((uint32_t) 0),
01163 NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl,
01164 0
01165 };
01166
01167
01168
01169 void T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
01170 UErrorCode * err)
01171 {
01172 const unsigned char *mySource = (unsigned char *) args->source;
01173 UChar *myTarget = args->target;
01174 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
01175 const UChar *targetLimit = args->targetLimit;
01176 unsigned char *toUBytes = args->converter->toUBytes;
01177 uint32_t ch, i;
01178
01179
01180 if (args->converter->toUnicodeStatus && myTarget < targetLimit)
01181 {
01182 i = args->converter->toULength;
01183
01184 ch = args->converter->toUnicodeStatus - 1;
01185 args->converter->toUnicodeStatus = 0;
01186 goto morebytes;
01187 }
01188
01189 while (mySource < sourceLimit && myTarget < targetLimit)
01190 {
01191 i = 0;
01192 ch = 0;
01193 morebytes:
01194 while (i < sizeof(uint32_t))
01195 {
01196 if (mySource < sourceLimit)
01197 {
01198 ch = (ch << 8) | (uint8_t)(*mySource);
01199 toUBytes[i++] = (char) *(mySource++);
01200 }
01201 else
01202 {
01203 if (args->flush)
01204 {
01205 if (U_SUCCESS(*err))
01206 {
01207 *err = U_TRUNCATED_CHAR_FOUND;
01208 args->converter->toUnicodeStatus = MAXIMUM_UCS4;
01209 }
01210 }
01211 else
01212 {
01213
01214 args->converter->toUnicodeStatus = ch + 1;
01215 args->converter->toULength = (int8_t) i;
01216 }
01217 goto donefornow;
01218 }
01219 }
01220
01221 if (ch <= MAXIMUM_UTF)
01222 {
01223
01224 if (ch <= MAXIMUM_UCS2)
01225 {
01226
01227 *(myTarget++) = (UChar) ch;
01228 }
01229 else
01230 {
01231
01232 ch -= HALF_BASE;
01233 *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
01234 ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
01235 if (myTarget < targetLimit)
01236 {
01237 *(myTarget++) = (UChar)ch;
01238 }
01239 else
01240 {
01241
01242 args->converter->UCharErrorBuffer[0] = (UChar) ch;
01243 args->converter->UCharErrorBufferLength = 1;
01244 *err = U_BUFFER_OVERFLOW_ERROR;
01245 break;
01246 }
01247 }
01248 }
01249 else
01250 {
01251 args->source = (const char *) mySource;
01252 args->target = myTarget;
01253 args->converter->invalidCharLength = (int8_t)i;
01254 if (T_UConverter_toUnicode_InvalidChar_Callback(args, err))
01255 {
01256
01257 break;
01258 }
01259 args->converter->invalidCharLength = 0;
01260 mySource = (unsigned char *) args->source;
01261 myTarget = args->target;
01262 }
01263 }
01264
01265 donefornow:
01266 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
01267 {
01268
01269 *err = U_BUFFER_OVERFLOW_ERROR;
01270 }
01271
01272 args->target = myTarget;
01273 args->source = (const char *) mySource;
01274 }
01275
01276 void T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
01277 UErrorCode * err)
01278 {
01279 const UChar *mySource = args->source;
01280 unsigned char *myTarget = (unsigned char *) args->target;
01281 const UChar *sourceLimit = args->sourceLimit;
01282 const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
01283 UChar32 ch, ch2;
01284 unsigned int indexToWrite;
01285 unsigned char temp[sizeof(uint32_t)];
01286
01287 temp[0] = 0;
01288
01289 if (args->converter->fromUnicodeStatus)
01290 {
01291 ch = args->converter->fromUnicodeStatus;
01292 args->converter->fromUnicodeStatus = 0;
01293 goto lowsurogate;
01294 }
01295
01296 while (mySource < sourceLimit && myTarget < targetLimit)
01297 {
01298 ch = *(mySource++);
01299
01300 if (SURROGATE_HIGH_START <= ch && ch < SURROGATE_LOW_START)
01301 {
01302 lowsurogate:
01303 if (mySource < sourceLimit)
01304 {
01305 ch2 = *mySource;
01306 if (SURROGATE_LOW_START <= ch2 && ch2 <= SURROGATE_LOW_END)
01307 {
01308 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
01309 mySource++;
01310 }
01311 }
01312 else if (!args->flush)
01313 {
01314
01315 args->converter->fromUnicodeStatus = ch;
01316 break;
01317 }
01318 }
01319
01320
01321
01322 temp[1] = (uint8_t) (ch >> 16 & 0x1F);
01323 temp[2] = (uint8_t) (ch >> 8 & 0xFF);
01324 temp[3] = (uint8_t) (ch & 0xFF);
01325
01326 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
01327 {
01328 if (myTarget < targetLimit)
01329 {
01330 *(myTarget++) = temp[indexToWrite];
01331 }
01332 else
01333 {
01334 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
01335 *err = U_BUFFER_OVERFLOW_ERROR;
01336 }
01337 }
01338 }
01339
01340 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
01341 {
01342 *err = U_BUFFER_OVERFLOW_ERROR;
01343 }
01344
01345 args->target = (char *) myTarget;
01346 args->source = mySource;
01347 }
01348
01349 UChar32 T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
01350 UErrorCode* err)
01351 {
01352 *err = U_UNSUPPORTED_ERROR;
01353 return 0;
01354 }
01355
01356 static const UConverterImpl _UTF32BEImpl = {
01357 UCNV_UTF32_BigEndian,
01358
01359 NULL,
01360 NULL,
01361
01362 NULL,
01363 NULL,
01364 NULL,
01365
01366 T_UConverter_toUnicode_UTF32_BE,
01367 NULL,
01368
01369 T_UConverter_fromUnicode_UTF32_BE,
01370 NULL,
01371
01372 T_UConverter_getNextUChar_UTF32_BE,
01373
01374 NULL,
01375 NULL
01376 };
01377
01379 const UConverterStaticData _UTF32BEStaticData = {
01380 sizeof(UConverterStaticData),
01381 "UTF32_BigEndian",
01382 1200, UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
01383 { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
01384 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
01385 };
01386
01387 const UConverterSharedData _UTF32BEData = {
01388 sizeof(UConverterSharedData), ~((uint32_t) 0),
01389 NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl,
01390 0
01391 };
01392
01393
01394
01395 void T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
01396 UErrorCode * err)
01397 {
01398 const unsigned char *mySource = (unsigned char *) args->source;
01399 UChar *myTarget = args->target;
01400 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
01401 const UChar *targetLimit = args->targetLimit;
01402 unsigned char *toUBytes = args->converter->toUBytes;
01403 uint32_t ch, i;
01404
01405
01406 if (args->converter->toUnicodeStatus && myTarget < targetLimit)
01407 {
01408 i = args->converter->toULength;
01409
01410
01411 ch = args->converter->toUnicodeStatus - 1;
01412 args->converter->toUnicodeStatus = 0;
01413 goto morebytes;
01414 }
01415
01416 while (mySource < sourceLimit && myTarget < targetLimit)
01417 {
01418 i = 0;
01419 ch = 0;
01420 morebytes:
01421 while (i < sizeof(uint32_t))
01422 {
01423 if (mySource < sourceLimit)
01424 {
01425 ch |= ((uint8_t)(*mySource)) << (i * 8);
01426 toUBytes[i++] = (char) *(mySource++);
01427 }
01428 else
01429 {
01430 if (args->flush)
01431 {
01432 if (U_SUCCESS(*err))
01433 {
01434 *err = U_TRUNCATED_CHAR_FOUND;
01435 args->converter->toUnicodeStatus = 0;
01436 }
01437 }
01438 else
01439 {
01440
01441 args->converter->toUnicodeStatus = ch + 1;
01442 args->converter->toULength = (int8_t) i;
01443 }
01444 goto donefornow;
01445 }
01446 }
01447
01448 if (ch <= MAXIMUM_UTF)
01449 {
01450
01451 if (ch <= MAXIMUM_UCS2)
01452 {
01453
01454 *(myTarget++) = (UChar) ch;
01455 }
01456 else
01457 {
01458
01459 ch -= HALF_BASE;
01460 *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
01461 ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
01462 if (myTarget < targetLimit)
01463 {
01464 *(myTarget++) = (UChar)ch;
01465 }
01466 else
01467 {
01468
01469 args->converter->UCharErrorBuffer[0] = (UChar) ch;
01470 args->converter->UCharErrorBufferLength = 1;
01471 *err = U_BUFFER_OVERFLOW_ERROR;
01472 break;
01473 }
01474 }
01475 }
01476 else
01477 {
01478 args->source = (const char *) mySource;
01479 args->target = myTarget;
01480 args->converter->invalidCharLength = (int8_t)i;
01481 if (T_UConverter_toUnicode_InvalidChar_Callback(args, err))
01482 {
01483
01484 break;
01485 }
01486 args->converter->invalidCharLength = 0;
01487 mySource = (unsigned char *) args->source;
01488 myTarget = args->target;
01489 }
01490 }
01491
01492 donefornow:
01493 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
01494 {
01495
01496 *err = U_BUFFER_OVERFLOW_ERROR;
01497 }
01498
01499 args->target = myTarget;
01500 args->source = (const char *) mySource;
01501 }
01502
01503 void T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
01504 UErrorCode * err)
01505 {
01506 const UChar *mySource = args->source;
01507 unsigned char *myTarget = (unsigned char *) args->target;
01508 const UChar *sourceLimit = args->sourceLimit;
01509 const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
01510 UChar32 ch, ch2;
01511 unsigned int indexToWrite;
01512 unsigned char temp[sizeof(uint32_t)];
01513
01514 temp[3] = 0;
01515
01516 if (args->converter->fromUnicodeStatus)
01517 {
01518 ch = args->converter->fromUnicodeStatus;
01519 args->converter->fromUnicodeStatus = 0;
01520 goto lowsurogate;
01521 }
01522
01523 while (mySource < sourceLimit && myTarget < targetLimit)
01524 {
01525 ch = *(mySource++);
01526
01527 if (SURROGATE_HIGH_START <= ch && ch < SURROGATE_LOW_START)
01528 {
01529 lowsurogate:
01530 if (mySource < sourceLimit)
01531 {
01532 ch2 = *mySource;
01533 if (SURROGATE_LOW_START <= ch2 && ch2 <= SURROGATE_LOW_END)
01534 {
01535 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
01536 mySource++;
01537 }
01538 }
01539 else if (!args->flush)
01540 {
01541
01542 args->converter->fromUnicodeStatus = ch;
01543 break;
01544 }
01545 }
01546
01547
01548
01549 temp[2] = (uint8_t) (ch >> 16 & 0x1F);
01550 temp[1] = (uint8_t) (ch >> 8 & 0xFF);
01551 temp[0] = (uint8_t) (ch & 0xFF);
01552
01553 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
01554 {
01555 if (myTarget < targetLimit)
01556 {
01557 *(myTarget++) = temp[indexToWrite];
01558 }
01559 else
01560 {
01561 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
01562 *err = U_BUFFER_OVERFLOW_ERROR;
01563 }
01564 }
01565 }
01566
01567 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
01568 {
01569 *err = U_BUFFER_OVERFLOW_ERROR;
01570 }
01571
01572 args->target = (char *) myTarget;
01573 args->source = mySource;
01574 }
01575
01576 UChar32 T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
01577 UErrorCode* err)
01578 {
01579 *err = U_UNSUPPORTED_ERROR;
01580 return 0;
01581 }
01582
01583 static const UConverterImpl _UTF32LEImpl = {
01584 UCNV_UTF32_LittleEndian,
01585
01586 NULL,
01587 NULL,
01588
01589 NULL,
01590 NULL,
01591 NULL,
01592
01593 T_UConverter_toUnicode_UTF32_LE,
01594 NULL,
01595
01596 T_UConverter_fromUnicode_UTF32_LE,
01597 NULL,
01598
01599 T_UConverter_getNextUChar_UTF32_LE,
01600
01601 NULL,
01602 NULL
01603 };
01604
01606 const UConverterStaticData _UTF32LEStaticData = {
01607 sizeof(UConverterStaticData),
01608 "UTF32_LittleEndian",
01609 1200, UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
01610 { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
01611 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
01612 };
01613
01614
01615 const UConverterSharedData _UTF32LEData = {
01616 sizeof(UConverterSharedData), ~((uint32_t) 0),
01617 NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl,
01618 0
01619 };