00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "ucmp16.h"
00026 #include "cmemory.h"
00027
00028
00029
00030
00031
00032 #define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
00033
00034 static const int32_t UCMP16_kMaxUnicode = UCMP16_kMaxUnicode_int;
00035 static const int32_t UCMP16_kUnicodeCount = UCMP16_kUnicodeCount_int;
00036 static const int32_t UCMP16_kBlockShift = UCMP16_kBlockShift_int;
00037 static const int32_t UCMP16_kBlockCount = UCMP16_kBlockCount_int;
00038 static const int32_t UCMP16_kBlockBytes = UCMP16_kBlockBytes_int;
00039 static const int32_t UCMP16_kIndexShift = UCMP16_kIndexShift_int;
00040 static const int32_t UCMP16_kIndexCount = UCMP16_kIndexCount_int;
00041 static const uint32_t UCMP16_kBlockMask = UCMP16_kBlockMask_int;
00042
00046 static CompactShortArray* setToBogus(CompactShortArray* array);
00047 static void touchBlock(CompactShortArray* this_obj,
00048 int32_t i,
00049 int16_t value);
00050 static UBool blockTouched(const CompactShortArray* this_obj,
00051 int32_t i);
00052
00053
00054
00055
00056
00057 int32_t ucmp16_getkUnicodeCount()
00058 {return UCMP16_kUnicodeCount;}
00059
00060 int32_t ucmp16_getkBlockCount()
00061 {return UCMP16_kBlockCount;}
00062
00063 CompactShortArray* ucmp16_open(int16_t defaultValue)
00064 {
00065 int32_t i;
00066 CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
00067 if (this_obj == NULL) return NULL;
00068
00069 this_obj->fStructSize = sizeof(CompactShortArray);
00070 this_obj->fCount = UCMP16_kUnicodeCount;
00071 this_obj->fCompact = FALSE;
00072 this_obj->fBogus = FALSE;
00073 this_obj->fArray = NULL;
00074 this_obj->fAlias = FALSE;
00075 this_obj->fIndex = NULL;
00076 this_obj->fHashes = NULL;
00077 this_obj->fIAmOwned = FALSE;
00078 this_obj->fDefaultValue = defaultValue;
00079
00080 this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
00081 if (this_obj->fArray == NULL)
00082 {
00083 this_obj->fBogus = TRUE;
00084 return NULL;
00085 }
00086
00087 this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
00088 if (this_obj->fIndex == NULL)
00089 {
00090 uprv_free(this_obj->fArray);
00091 this_obj->fArray = NULL;
00092
00093 this_obj->fBogus = TRUE;
00094 return NULL;
00095 }
00096
00097 this_obj->kBlockShift = UCMP16_kBlockShift;
00098 this_obj->kBlockMask = UCMP16_kBlockMask;
00099 for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
00100 {
00101 this_obj->fArray[i] = defaultValue;
00102 }
00103
00104 this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
00105 if (this_obj->fHashes == NULL)
00106 {
00107 uprv_free(this_obj->fArray);
00108 uprv_free(this_obj->fIndex);
00109 this_obj->fBogus = TRUE;
00110 return NULL;
00111 }
00112
00113 for (i = 0; i < UCMP16_kIndexCount; i += 1)
00114 {
00115 this_obj->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
00116 this_obj->fHashes[i] = 0;
00117 }
00118
00119 return this_obj;
00120 }
00121
00122 void ucmp16_initBogus(CompactShortArray *this_obj)
00123 {
00124 if (this_obj == NULL) return;
00125 this_obj->fStructSize = sizeof(CompactShortArray);
00126 this_obj->fCount = UCMP16_kUnicodeCount;
00127 this_obj->fCompact = FALSE;
00128 this_obj->fBogus = TRUE;
00129 this_obj->fArray = NULL;
00130 this_obj->fAlias = FALSE;
00131 this_obj->fIndex = NULL;
00132 this_obj->fHashes = NULL;
00133 this_obj->fIAmOwned = TRUE;
00134 this_obj->fDefaultValue = 0;
00135 }
00136
00137 void ucmp16_init(CompactShortArray *this_obj, int16_t defaultValue)
00138 {
00139 int32_t i;
00140
00141 this_obj->fStructSize = sizeof(CompactShortArray);
00142 this_obj->fCount = UCMP16_kUnicodeCount;
00143 this_obj->fCompact = FALSE;
00144 this_obj->fBogus = FALSE;
00145 this_obj->fArray = NULL;
00146 this_obj->fAlias = FALSE;
00147 this_obj->fIndex = NULL;
00148 this_obj->fHashes = NULL;
00149 this_obj->fIAmOwned = TRUE;
00150 this_obj->fDefaultValue = defaultValue;
00151
00152 this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
00153 if (this_obj->fArray == NULL)
00154 {
00155 this_obj->fBogus = TRUE;
00156 return;
00157 }
00158
00159 this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
00160 if (this_obj->fIndex == NULL)
00161 {
00162 uprv_free(this_obj->fArray);
00163 this_obj->fArray = NULL;
00164
00165 this_obj->fBogus = TRUE;
00166 return;
00167 }
00168
00169 this_obj->kBlockShift = UCMP16_kBlockShift;
00170 this_obj->kBlockMask = UCMP16_kBlockMask;
00171 for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
00172 {
00173 this_obj->fArray[i] = defaultValue;
00174 }
00175
00176 this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
00177 if (this_obj->fHashes == NULL)
00178 {
00179 uprv_free(this_obj->fArray);
00180 uprv_free(this_obj->fIndex);
00181 this_obj->fBogus = TRUE;
00182 return;
00183 }
00184
00185 for (i = 0; i < UCMP16_kIndexCount; i += 1)
00186 {
00187 this_obj->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
00188 this_obj->fHashes[i] = 0;
00189 }
00190 }
00191
00192 CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
00193 int16_t *newValues,
00194 int32_t count,
00195 int16_t defaultValue)
00196 {
00197 CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
00198
00199 ucmp16_initAdopt(this_obj, indexArray, newValues, count, defaultValue);
00200 this_obj->fIAmOwned = FALSE;
00201 return this_obj;
00202 }
00203
00204 CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
00205 int16_t *newValues,
00206 int32_t count,
00207 int16_t defaultValue,
00208 int32_t blockShift)
00209 {
00210 CompactShortArray* this_obj = ucmp16_openAdopt(indexArray,
00211 newValues,
00212 count,
00213 defaultValue);
00214 if (this_obj) {
00215 this_obj->kBlockShift = blockShift;
00216 this_obj->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
00217 }
00218
00219 return this_obj;
00220 }
00221
00222 CompactShortArray* ucmp16_openAlias(uint16_t *indexArray,
00223 int16_t *newValues,
00224 int32_t count,
00225 int16_t defaultValue)
00226 {
00227 CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
00228
00229 ucmp16_initAlias(this_obj, indexArray, newValues, count, defaultValue);
00230 this_obj->fIAmOwned = FALSE;
00231 return this_obj;
00232 }
00233
00234
00235
00236 CompactShortArray* ucmp16_initAdopt(CompactShortArray *this_obj,
00237 uint16_t *indexArray,
00238 int16_t *newValues,
00239 int32_t count,
00240 int16_t defaultValue)
00241 {
00242 if (this_obj) {
00243 this_obj->fHashes = NULL;
00244 this_obj->fCount = count;
00245 this_obj->fDefaultValue = defaultValue;
00246 this_obj->fBogus = FALSE;
00247 this_obj->fArray = newValues;
00248 this_obj->fIndex = indexArray;
00249 this_obj->fCompact = (UBool)(count < UCMP16_kUnicodeCount);
00250 this_obj->fStructSize = sizeof(CompactShortArray);
00251 this_obj->kBlockShift = UCMP16_kBlockShift;
00252 this_obj->kBlockMask = UCMP16_kBlockMask;
00253 this_obj->fAlias = FALSE;
00254 this_obj->fIAmOwned = TRUE;
00255 }
00256
00257 return this_obj;
00258 }
00259
00260 CompactShortArray* ucmp16_initAdoptWithBlockShift(CompactShortArray *this_obj,
00261 uint16_t *indexArray,
00262 int16_t *newValues,
00263 int32_t count,
00264 int16_t defaultValue,
00265 int32_t blockShift)
00266 {
00267 ucmp16_initAdopt(this_obj, indexArray, newValues, count, defaultValue);
00268
00269 if (this_obj) {
00270 this_obj->kBlockShift = blockShift;
00271 this_obj->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
00272 }
00273
00274 return this_obj;
00275 }
00276
00277
00278 CompactShortArray* ucmp16_initAlias(CompactShortArray *this_obj,
00279 uint16_t *indexArray,
00280 int16_t *newValues,
00281 int32_t count,
00282 int16_t defaultValue)
00283 {
00284 if (this_obj) {
00285 this_obj->fHashes = NULL;
00286 this_obj->fCount = count;
00287 this_obj->fDefaultValue = defaultValue;
00288 this_obj->fBogus = FALSE;
00289 this_obj->fArray = newValues;
00290 this_obj->fIndex = indexArray;
00291 this_obj->fCompact = (UBool)(count < UCMP16_kUnicodeCount);
00292 this_obj->fStructSize = sizeof(CompactShortArray);
00293 this_obj->kBlockShift = UCMP16_kBlockShift;
00294 this_obj->kBlockMask = UCMP16_kBlockMask;
00295 this_obj->fAlias = TRUE;
00296 this_obj->fIAmOwned = TRUE;
00297 }
00298
00299 return this_obj;
00300 }
00301
00302 CompactShortArray* ucmp16_initAliasWithBlockShift(CompactShortArray *this_obj,
00303 uint16_t *indexArray,
00304 int16_t *newValues,
00305 int32_t count,
00306 int16_t defaultValue,
00307 int32_t blockShift)
00308 {
00309 ucmp16_initAlias(this_obj, indexArray, newValues, count, defaultValue);
00310
00311 if (this_obj) {
00312 this_obj->kBlockShift = blockShift;
00313 this_obj->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
00314 }
00315
00316 return this_obj;
00317 }
00318
00319
00320
00321
00322 void ucmp16_close(CompactShortArray* this_obj)
00323 {
00324 if(this_obj != NULL) {
00325 if(!this_obj->fAlias) {
00326 if(this_obj->fArray != NULL) {
00327 uprv_free(this_obj->fArray);
00328 }
00329 if(this_obj->fIndex != NULL) {
00330 uprv_free(this_obj->fIndex);
00331 }
00332 }
00333 if(this_obj->fHashes != NULL) {
00334 uprv_free(this_obj->fHashes);
00335 }
00336 if(!this_obj->fIAmOwned)
00337 {
00338 uprv_free(this_obj);
00339 }
00340 }
00341 }
00342
00343 static CompactShortArray* setToBogus(CompactShortArray* this_obj)
00344 {
00345 if(this_obj != NULL) {
00346 if(!this_obj->fAlias) {
00347 uprv_free(this_obj->fArray);
00348 this_obj->fArray = NULL;
00349
00350 uprv_free(this_obj->fIndex);
00351 this_obj->fIndex = NULL;
00352 }
00353 uprv_free(this_obj->fHashes);
00354 this_obj->fHashes = NULL;
00355
00356 this_obj->fCount = 0;
00357 this_obj->fCompact = FALSE;
00358 this_obj->fBogus = TRUE;
00359 }
00360
00361 return this_obj;
00362 }
00363
00364 UBool ucmp16_isBogus(const CompactShortArray* this_obj)
00365 {
00366 return (UBool)(this_obj == NULL || this_obj->fBogus);
00367 }
00368
00369 void ucmp16_expand(CompactShortArray* this_obj)
00370 {
00371 if (this_obj->fCompact)
00372 {
00373 int32_t i;
00374 int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
00375
00376
00377 if (tempArray == NULL)
00378 {
00379 this_obj->fBogus = TRUE;
00380 return;
00381 }
00382
00383 this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
00384 if (this_obj->fHashes == NULL)
00385 {
00386 this_obj->fBogus = TRUE;
00387 return;
00388 }
00389
00390 for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
00391 {
00392 tempArray[i] = ucmp16_get(this_obj, (UChar)i);
00393 }
00394
00395 for (i = 0; i < (1 << (16 - this_obj->kBlockShift)); i += 1)
00396 {
00397 this_obj->fIndex[i] = (uint16_t)(i<<this_obj->kBlockShift);
00398 }
00399
00400 uprv_free(this_obj->fArray);
00401 this_obj->fArray = tempArray;
00402 this_obj->fCompact = FALSE;
00403 }
00404 }
00405
00406 void ucmp16_set(CompactShortArray* this_obj,
00407 UChar c,
00408 int16_t value)
00409 {
00410 if (this_obj->fCompact)
00411 {
00412 ucmp16_expand(this_obj);
00413 if (this_obj->fBogus) return;
00414 }
00415
00416 this_obj->fArray[(int32_t)c] = value;
00417
00418 if (value != this_obj->fDefaultValue)
00419 {
00420 touchBlock(this_obj, c >> this_obj->kBlockShift, value);
00421 }
00422 }
00423
00424
00425 void ucmp16_setRange(CompactShortArray* this_obj,
00426 UChar start,
00427 UChar end,
00428 int16_t value)
00429 {
00430 int32_t i;
00431 if (this_obj->fCompact)
00432 {
00433 ucmp16_expand(this_obj);
00434 if (this_obj->fBogus) return;
00435 }
00436 if (value != this_obj->fDefaultValue)
00437 {
00438 for (i = start; i <= end; i += 1)
00439 {
00440 this_obj->fArray[i] = value;
00441 touchBlock(this_obj, i >> this_obj->kBlockShift, value);
00442 }
00443 }
00444 else
00445 {
00446 for (i = start; i <= end; i += 1) this_obj->fArray[i] = value;
00447 }
00448 }
00449
00450
00451
00452 void ucmp16_compact(CompactShortArray* this_obj)
00453 {
00454 if (!this_obj->fCompact)
00455 {
00456 int32_t limitCompacted = 0;
00457 int32_t i, iBlockStart;
00458 int16_t iUntouched = -1;
00459
00460 for (i = 0, iBlockStart = 0; i < (1 << (16 - this_obj->kBlockShift)); i += 1, iBlockStart += (1 << this_obj->kBlockShift))
00461 {
00462 UBool touched = blockTouched(this_obj, i);
00463
00464 this_obj->fIndex[i] = 0xFFFF;
00465
00466 if (!touched && iUntouched != -1)
00467 {
00468
00469
00470
00471
00472 this_obj->fIndex[i] = iUntouched;
00473 }
00474 else
00475 {
00476 int32_t j, jBlockStart;
00477
00478 for (j = 0, jBlockStart = 0;
00479 j < limitCompacted;
00480 j += 1, jBlockStart += (1 << this_obj->kBlockShift))
00481 {
00482 if (this_obj->fHashes[i] == this_obj->fHashes[j] &&
00483 arrayRegionMatches(this_obj->fArray,
00484 iBlockStart,
00485 this_obj->fArray,
00486 jBlockStart,
00487 (1 << this_obj->kBlockShift)))
00488 {
00489 this_obj->fIndex[i] = (int16_t)jBlockStart;
00490 }
00491 }
00492
00493
00494 if (this_obj->fIndex[i] == 0xFFFF)
00495 {
00496
00497 uprv_memcpy(&(this_obj->fArray[jBlockStart]),
00498 &(this_obj->fArray[iBlockStart]),
00499 (1 << this_obj->kBlockShift)*sizeof(int16_t));
00500
00501 this_obj->fIndex[i] = (int16_t)jBlockStart;
00502 this_obj->fHashes[j] = this_obj->fHashes[i];
00503 limitCompacted += 1;
00504
00505 if (!touched)
00506 {
00507
00508
00509 iUntouched = (int16_t)jBlockStart;
00510 }
00511 }
00512 }
00513 }
00514
00515
00516 {
00517 int32_t newSize = limitCompacted * (1 << this_obj->kBlockShift);
00518 int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
00519
00520 uprv_memcpy(result, this_obj->fArray, newSize * sizeof(int16_t));
00521
00522 uprv_free(this_obj->fArray);
00523 this_obj->fArray = result;
00524 this_obj->fCount = newSize;
00525 uprv_free(this_obj->fHashes);
00526 this_obj->fHashes = NULL;
00527
00528 this_obj->fCompact = TRUE;
00529 }
00530 }
00531 }
00532
00538 int16_t ucmp16_getDefaultValue(const CompactShortArray* this_obj)
00539 {
00540 return this_obj->fDefaultValue;
00541 }
00542
00543
00544 static void touchBlock(CompactShortArray* this_obj,
00545 int32_t i,
00546 int16_t value)
00547 {
00548 this_obj->fHashes[i] = (this_obj->fHashes[i] + (value << 1)) | 1;
00549 }
00550
00551 static UBool blockTouched(const CompactShortArray* this_obj, int32_t i)
00552 {
00553 return (UBool)(this_obj->fHashes[i] != 0);
00554 }
00555
00556 uint32_t ucmp16_getCount(const CompactShortArray* this_obj)
00557 {
00558 return this_obj->fCount;
00559 }
00560
00561 const int16_t* ucmp16_getArray(const CompactShortArray* this_obj)
00562 {
00563 return this_obj->fArray;
00564 }
00565
00566 const uint16_t* ucmp16_getIndex(const CompactShortArray* this_obj)
00567 {
00568 return this_obj->fIndex;
00569 }
00570
00571 U_CAPI uint32_t U_EXPORT2 ucmp16_flattenMem (const CompactShortArray* array, UMemoryStream *MS)
00572 {
00573 int32_t size = 0;
00574
00575 uprv_mstrm_write32(MS, ICU_UCMP16_VERSION);
00576 size += 4;
00577
00578 uprv_mstrm_write32(MS, array->fCount);
00579 size += 4;
00580
00581 uprv_mstrm_write32(MS, array->kBlockShift);
00582 size += 4;
00583
00584 uprv_mstrm_write32(MS, array->kBlockMask);
00585 size += 4;
00586
00587 uprv_mstrm_writeBlock(MS, array->fIndex, sizeof(array->fIndex[0])*UCMP16_kIndexCount);
00588 size += sizeof(array->fIndex[0])*UCMP16_kIndexCount;
00589
00590 uprv_mstrm_writeBlock(MS, array->fArray, sizeof(array->fArray[0])*array->fCount);
00591 size += sizeof(array->fArray[0])*array->fCount;
00592
00593 while(size%4)
00594 {
00595 uprv_mstrm_writePadding(MS, 1);
00596 size += 1;
00597 }
00598
00599 return size;
00600 }
00601
00602
00603
00604
00605
00606
00607 U_CAPI void U_EXPORT2 ucmp16_initFromData(CompactShortArray *this_obj, const uint8_t **source, UErrorCode *status)
00608 {
00609 uint32_t i;
00610 const uint8_t *oldSource = *source;
00611
00612 if(U_FAILURE(*status))
00613 return;
00614
00615 this_obj->fArray = NULL;
00616 this_obj->fIndex = NULL;
00617 this_obj->fBogus = FALSE;
00618 this_obj->fStructSize = sizeof(CompactShortArray);
00619 this_obj->fCompact = TRUE;
00620 this_obj->fAlias = TRUE;
00621 this_obj->fIAmOwned = TRUE;
00622 this_obj->fHashes = NULL;
00623 this_obj->fDefaultValue = 0x0000;
00624
00625 i = * ((const uint32_t*) *source);
00626 (*source) += 4;
00627
00628 if(i != ICU_UCMP16_VERSION)
00629 {
00630 *status = U_INVALID_FORMAT_ERROR;
00631 return;
00632 }
00633
00634 this_obj->fCount = * ((const uint32_t*)*source);
00635 (*source) += 4;
00636
00637 this_obj->kBlockShift = * ((const uint32_t*)*source);
00638 (*source) += 4;
00639
00640 this_obj->kBlockMask = * ((const uint32_t*)*source);
00641 (*source) += 4;
00642
00643 this_obj->fIndex = (uint16_t*) *source;
00644 (*source) += sizeof(this_obj->fIndex[0])*UCMP16_kIndexCount;
00645
00646 this_obj->fArray = (int16_t*) *source;
00647 (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount;
00648
00649
00650 while((*source-(oldSource))%4)
00651 (*source)++;
00652 }
00653
00654