00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "unicode/ustring.h"
00019 #include "unicode/utypes.h"
00020 #include "cstring.h"
00021 #include "umutex.h"
00022 #include "unicode/ucnv.h"
00023
00024
00025
00026 static UConverter *fgDefaultConverter = NULL;
00027
00028 static UConverter*
00029 getDefaultConverter(void);
00030
00031 static void
00032 releaseDefaultConverter(UConverter *converter);
00033
00034
00035
00036 #define MAX_STRLEN 0x0FFFFFFF
00037
00038 UChar*
00039 u_strcat(UChar *dst,
00040 const UChar *src)
00041 {
00042 UChar *anchor = dst;
00043
00044 while(*dst != 0) {
00045 ++dst;
00046 }
00047 while((*dst = *src) != 0) {
00048 ++dst;
00049 ++src;
00050 }
00051
00052 return anchor;
00053 }
00054
00055 UChar*
00056 u_strncat(UChar *dst,
00057 const UChar *src,
00058 int32_t n )
00059 {
00060 if(n > 0) {
00061 UChar *anchor = dst;
00062
00063 while(*dst != 0) {
00064 ++dst;
00065 }
00066 while((*dst = *src) != 0) {
00067 ++dst;
00068 if(--n == 0) {
00069 *dst = 0;
00070 break;
00071 }
00072 ++src;
00073 }
00074
00075 return anchor;
00076 } else {
00077 return dst;
00078 }
00079 }
00080
00081 UChar*
00082 u_strchr(const UChar *s, UChar c)
00083 {
00084 while (*s && *s != c) {
00085 ++s;
00086 }
00087 if (*s == c)
00088 return (UChar *)s;
00089 return NULL;
00090 }
00091
00092
00093
00094
00095 U_CAPI UChar * U_EXPORT2
00096 u_strstr(const UChar *s, const UChar *substring) {
00097
00098 UChar *strItr, *subItr;
00099
00100 if (*substring == 0) {
00101 return (UChar *)s;
00102 }
00103
00104 do {
00105 strItr = (UChar *)s;
00106 subItr = (UChar *)substring;
00107
00108
00109 while ((*strItr != 0) && (*strItr == *subItr)) {
00110 strItr++;
00111 subItr++;
00112 }
00113
00114 if (*subItr == 0) {
00115 return (UChar *)s;
00116 }
00117
00118 s++;
00119 } while (*strItr != 0);
00120
00121 return NULL;
00122 }
00123
00124 U_CAPI UChar * U_EXPORT2
00125 u_strchr32(const UChar *s, UChar32 c) {
00126 if(!UTF_NEED_MULTIPLE_UCHAR(c)) {
00127 return u_strchr(s, (UChar)c);
00128 } else {
00129 UChar buffer[UTF_MAX_CHAR_LENGTH + 1];
00130 UTextOffset i = 0;
00131 UTF_APPEND_CHAR_UNSAFE(buffer, i, c);
00132 buffer[i] = 0;
00133 return u_strstr(s, buffer);
00134 }
00135 }
00136
00137 int32_t
00138 u_strcmp(const UChar *s1,
00139 const UChar *s2)
00140 {
00141 int32_t rc;
00142 for(;;) {
00143 rc = (int32_t)*s1 - (int32_t)*s2;
00144 if(rc != 0 || *s1 == 0) {
00145 return rc;
00146 }
00147 ++s1;
00148 ++s2;
00149 }
00150 }
00151
00152 int32_t
00153 u_strncmp(const UChar *s1,
00154 const UChar *s2,
00155 int32_t n)
00156 {
00157 if(n > 0) {
00158 int32_t rc;
00159 for(;;) {
00160 rc = (int32_t)*s1 - (int32_t)*s2;
00161 if(rc != 0 || *s1 == 0 || --n == 0) {
00162 return rc;
00163 }
00164 ++s1;
00165 ++s2;
00166 }
00167 } else {
00168 return 0;
00169 }
00170 }
00171
00172 UChar*
00173 u_strcpy(UChar *dst,
00174 const UChar *src)
00175 {
00176 UChar *anchor = dst;
00177
00178 while((*dst = *src) != 0) {
00179 ++dst;
00180 ++src;
00181 }
00182
00183 return anchor;
00184 }
00185
00186 UChar*
00187 u_strncpy(UChar *dst,
00188 const UChar *src,
00189 int32_t n)
00190 {
00191 UChar *anchor = dst;
00192
00193 if(n > 0) {
00194 while((*dst = *src) != 0) {
00195 ++dst;
00196 if(--n == 0) {
00197 *dst = 0;
00198 break;
00199 }
00200 ++src;
00201 }
00202 } else {
00203 *dst = 0;
00204 }
00205
00206 return anchor;
00207 }
00208
00209 int32_t
00210 u_strlen(const UChar *s)
00211 {
00212 # if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
00213 return uprv_wcslen(s);
00214 # else
00215 const UChar *t = s;
00216 while(*t != 0) {
00217 ++t;
00218 }
00219 return t - s;
00220 #endif
00221 }
00222
00223
00224
00225 UChar* u_uastrcpy(UChar *ucs1,
00226 const char *s2 )
00227 {
00228 UConverter *cnv = getDefaultConverter();
00229 if(cnv != NULL) {
00230 UErrorCode err = U_ZERO_ERROR;
00231 ucnv_toUChars(cnv,
00232 ucs1,
00233 MAX_STRLEN,
00234 s2,
00235 uprv_strlen(s2),
00236 &err);
00237 releaseDefaultConverter(cnv);
00238 if(U_FAILURE(err)) {
00239 *ucs1 = 0;
00240 }
00241 } else {
00242 *ucs1 = 0;
00243 }
00244 return ucs1;
00245 }
00246
00247
00248
00249
00250 static int32_t u_astrnlen(const char *ucs1, int32_t n)
00251 {
00252 int32_t len = 0;
00253
00254 if (ucs1)
00255 {
00256 while (*(ucs1++) && n--)
00257 {
00258 len++;
00259 }
00260 }
00261 return len;
00262 }
00263
00264 UChar* u_uastrncpy(UChar *ucs1,
00265 const char *s2 ,
00266 int32_t n)
00267 {
00268 UChar *target = ucs1;
00269 UConverter *cnv = getDefaultConverter();
00270 if(cnv != NULL) {
00271 UErrorCode err = U_ZERO_ERROR;
00272 ucnv_reset(cnv);
00273 ucnv_toUnicode(cnv,
00274 &target,
00275 ucs1+n,
00276 &s2,
00277 s2+u_astrnlen(s2, n),
00278 NULL,
00279 TRUE,
00280 &err);
00281 ucnv_reset(cnv);
00282 releaseDefaultConverter(cnv);
00283 if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
00284 *ucs1 = 0;
00285 }
00286 if(target < (ucs1+n)) {
00287 *target = 0;
00288 }
00289 } else {
00290 *ucs1 = 0;
00291 }
00292 return ucs1;
00293 }
00294
00295 char* u_austrcpy(char *s1,
00296 const UChar *ucs2 )
00297 {
00298 UConverter *cnv = getDefaultConverter();
00299 if(cnv != NULL) {
00300 UErrorCode err = U_ZERO_ERROR;
00301 int32_t len = ucnv_fromUChars(cnv,
00302 s1,
00303 MAX_STRLEN,
00304 ucs2,
00305 -1,
00306 &err);
00307 releaseDefaultConverter(cnv);
00308 s1[len] = 0;
00309 } else {
00310 *s1 = 0;
00311 }
00312 return s1;
00313 }
00314
00315
00316
00317
00318
00319 static UConverter*
00320 getDefaultConverter()
00321 {
00322 UConverter *converter = NULL;
00323
00324 if(fgDefaultConverter != NULL) {
00325 umtx_lock(NULL);
00326
00327
00328 if(fgDefaultConverter != NULL) {
00329 converter = fgDefaultConverter;
00330 fgDefaultConverter = NULL;
00331 }
00332 umtx_unlock(NULL);
00333 }
00334
00335
00336 if(converter == NULL) {
00337 UErrorCode status = U_ZERO_ERROR;
00338 converter = ucnv_open(NULL, &status);
00339 if(U_FAILURE(status)) {
00340 return NULL;
00341 }
00342 }
00343
00344 return converter;
00345 }
00346
00347 static void
00348 releaseDefaultConverter(UConverter *converter)
00349 {
00350 if(fgDefaultConverter == NULL) {
00351 umtx_lock(NULL);
00352
00353 if(fgDefaultConverter == NULL) {
00354 fgDefaultConverter = converter;
00355 converter = NULL;
00356 }
00357 umtx_unlock(NULL);
00358 }
00359
00360 if(converter != NULL) {
00361 ucnv_close(converter);
00362 }
00363 }
00364
00365
00366
00367
00368 static const UChar UNESCAPE_MAP[] = {
00369
00370
00371
00372
00373 0x61, 0x07,
00374 0x62, 0x08,
00375 0x66, 0x0c,
00376 0x6E, 0x0a,
00377 0x72, 0x0d,
00378 0x74, 0x09,
00379 0x76, 0x0b
00380 };
00381 enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
00382
00383
00384 static int8_t _digit8(UChar c) {
00385 if (c >= 0x0030 && c <= 0x0037) {
00386 return (int8_t)(c - 0x0030);
00387 }
00388 return -1;
00389 }
00390
00391
00392 static int8_t _digit16(UChar c) {
00393 if (c >= 0x0030 && c <= 0x0039) {
00394 return (int8_t)(c - 0x0030);
00395 }
00396 if (c >= 0x0041 && c <= 0x0046) {
00397 return (int8_t)(c - (0x0041 - 10));
00398 }
00399 if (c >= 0x0061 && c <= 0x0066) {
00400 return (int8_t)(c - (0x0061 - 10));
00401 }
00402 return -1;
00403 }
00404
00405
00406
00407
00408 U_CAPI UChar32 U_EXPORT2
00409 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
00410 int32_t *offset,
00411 int32_t length,
00412 void *context) {
00413
00414 int32_t start = *offset;
00415 UChar c;
00416 UChar32 result = 0;
00417 int8_t n = 0;
00418 int8_t minDig = 0;
00419 int8_t maxDig = 0;
00420 int8_t bitsPerDigit = 4;
00421 int8_t dig;
00422 int32_t i;
00423
00424
00425 if (*offset < 0 || *offset >= length) {
00426 goto err;
00427 }
00428
00429
00430 c = charAt((*offset)++, context);
00431
00432
00433 switch (c) {
00434 case 0x0075 :
00435 minDig = maxDig = 4;
00436 break;
00437 case 0x0055 :
00438 minDig = maxDig = 8;
00439 break;
00440 case 0x0078 :
00441 minDig = 1;
00442 maxDig = 2;
00443 break;
00444 default:
00445 dig = _digit8(c);
00446 if (dig >= 0) {
00447 minDig = 1;
00448 maxDig = 3;
00449 n = 1;
00450 bitsPerDigit = 3;
00451 result = dig;
00452 }
00453 break;
00454 }
00455 if (minDig != 0) {
00456 while (*offset < length && n < maxDig) {
00457 c = charAt(*offset, context);
00458 dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
00459 if (dig < 0) {
00460 break;
00461 }
00462 result = (result << bitsPerDigit) | dig;
00463 ++(*offset);
00464 ++n;
00465 }
00466 if (n < minDig) {
00467 goto err;
00468 }
00469 return result;
00470 }
00471
00472
00473 for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
00474 if (c == UNESCAPE_MAP[i]) {
00475 return UNESCAPE_MAP[i+1];
00476 } else if (c < UNESCAPE_MAP[i]) {
00477 break;
00478 }
00479 }
00480
00481
00482
00483
00484 if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
00485 UChar c2 = charAt(*offset, context);
00486 if (UTF_IS_SECOND_SURROGATE(c2)) {
00487 ++(*offset);
00488 return UTF16_GET_PAIR_VALUE(c, c2);
00489 }
00490 }
00491 return c;
00492
00493 err:
00494
00495 *offset = start;
00496 return (UChar32)0xFFFFFFFF;
00497 }
00498
00499
00500 static UChar _charPtr_charAt(int32_t offset, void *context) {
00501 UChar c16;
00502
00503
00504 u_charsToUChars(((char*) context) + offset, &c16, 1);
00505 return c16;
00506 }
00507
00508
00509 static void _appendUChars(UChar *dest, int32_t destCapacity,
00510 const char *src, int32_t srcLen) {
00511 if (destCapacity < 0) {
00512 destCapacity = 0;
00513 }
00514 if (srcLen > destCapacity) {
00515 srcLen = destCapacity;
00516 }
00517 u_charsToUChars(src, dest, srcLen);
00518 }
00519
00520
00521 U_CAPI int32_t U_EXPORT2
00522 u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
00523 const char *segment = src;
00524 int32_t i = 0;
00525 char c;
00526
00527 while ((c=*src) != 0) {
00528
00529
00530
00531 if (c == '\\') {
00532 int32_t lenParsed = 0;
00533 UChar32 c32;
00534 if (src != segment) {
00535 if (dest != NULL) {
00536 _appendUChars(dest + i, destCapacity - i,
00537 segment, src - segment);
00538 }
00539 i += src - segment;
00540 }
00541 ++src;
00542 c32 = u_unescapeAt(_charPtr_charAt, &lenParsed, uprv_strlen(src), (void*)src);
00543 if (lenParsed == 0) {
00544 goto err;
00545 }
00546 src += lenParsed;
00547 if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
00548 UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
00549 } else {
00550 i += UTF_CHAR_LENGTH(c32);
00551 }
00552 segment = src;
00553 } else {
00554 ++src;
00555 }
00556 }
00557 if (src != segment) {
00558 if (dest != NULL) {
00559 _appendUChars(dest + i, destCapacity - i,
00560 segment, src - segment);
00561 }
00562 i += src - segment;
00563 }
00564 if (dest != NULL && i < destCapacity) {
00565 dest[i] = 0;
00566 }
00567 return i + 1;
00568
00569 err:
00570 if (dest != NULL && destCapacity > 0) {
00571 *dest = 0;
00572 }
00573 return 0;
00574 }