Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

unames.c

00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  unames.c
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 1999oct04
00014 *   created by: Markus W. Scherer
00015 */
00016 
00017 /* set import/export definitions */
00018 #ifndef U_COMMON_IMPLEMENTATION
00019 #   define U_COMMON_IMPLEMENTATION
00020 #endif
00021 
00022 #include "unicode/utypes.h"
00023 #include "umutex.h"
00024 #include "cmemory.h"
00025 #include "cstring.h"
00026 #include "unicode/uchar.h"
00027 #include "unicode/udata.h"
00028 
00029 /* prototypes --------------------------------------------------------------- */
00030 
00031 #define DATA_NAME "unames"
00032 #define DATA_TYPE "dat"
00033 
00034 #define GROUP_SHIFT 5
00035 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
00036 #define GROUP_MASK (LINES_PER_GROUP-1)
00037 
00038 typedef struct {
00039     uint16_t groupMSB,
00040              offsetHigh, offsetLow; /* avoid padding */
00041 } Group;
00042 
00043 typedef struct {
00044     uint32_t start, end;
00045     uint8_t type, variant;
00046     uint16_t size;
00047 } AlgorithmicRange;
00048 
00049 typedef struct {
00050     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
00051 } UCharNames;
00052 
00053 typedef struct {
00054     const char *otherName;
00055     UChar32 code;
00056 } FindName;
00057 
00058 #define DO_FIND_NAME (findNameDummy)
00059 
00060 static UDataMemory *uCharNamesData=NULL;
00061 static UCharNames *uCharNames=NULL;
00062 
00063 static UBool
00064 isDataLoaded(UErrorCode *pErrorCode);
00065 
00066 static UBool
00067 isAcceptable(void *context,
00068              const char *type, const char *name,
00069              const UDataInfo *pInfo);
00070 
00071 static Group *
00072 getGroup(UCharNames *names, uint32_t code);
00073 
00074 static uint16_t
00075 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
00076         char *buffer, uint16_t bufferLength);
00077 
00078 static const uint8_t *
00079 expandGroupLengths(const uint8_t *s,
00080                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]);
00081 
00082 static uint16_t
00083 expandGroupName(UCharNames *names, Group *group,
00084                 uint16_t lineNumber, UCharNameChoice nameChoice,
00085                 char *buffer, uint16_t bufferLength);
00086 
00087 static uint16_t
00088 expandName(UCharNames *names,
00089            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
00090            char *buffer, uint16_t bufferLength);
00091 
00092 static UBool
00093 compareName(UCharNames *names,
00094             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
00095             const char *otherName);
00096 
00097 static UBool
00098 enumGroupNames(UCharNames *names, Group *group,
00099                UChar32 start, UChar32 end,
00100                UEnumCharNamesFn *fn, void *context,
00101                UCharNameChoice nameChoice);
00102 
00103 static UBool
00104 enumNames(UCharNames *names,
00105           UChar32 start, UChar32 limit,
00106           UEnumCharNamesFn *fn, void *context,
00107           UCharNameChoice nameChoice);
00108 
00109 static uint16_t
00110 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
00111         char *buffer, uint16_t bufferLength);
00112 
00113 static uint16_t
00114 writeFactorSuffix(const uint16_t *factors, uint16_t count,
00115                   const char *s, /* suffix elements */
00116                   uint32_t code,
00117                   uint16_t indexes[8], /* output fields from here */
00118                   const char *elementBases[8], const char *elements[8],
00119                   char *buffer, uint16_t bufferLength);
00120 
00121 static UBool
00122 enumAlgNames(AlgorithmicRange *range,
00123              UChar32 start, UChar32 limit,
00124              UEnumCharNamesFn *fn, void *context,
00125              UCharNameChoice nameChoice);
00126 
00127 static UChar32
00128 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName);
00129 
00130 static UBool
00131 findNameDummy(void *context,
00132               UChar32 code, UCharNameChoice nameChoice,
00133               const char *name, UTextOffset length);
00134 
00135 /* public API --------------------------------------------------------------- */
00136 
00137 U_CAPI UTextOffset U_EXPORT2
00138 u_charName(UChar32 code, UCharNameChoice nameChoice,
00139            char *buffer, UTextOffset bufferLength,
00140            UErrorCode *pErrorCode) {
00141     AlgorithmicRange *algRange;
00142     uint32_t *p;
00143     uint32_t i;
00144 
00145     /* check the argument values */
00146     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00147         return 0;
00148     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || buffer==NULL) {
00149         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00150         return 0;
00151     }
00152 
00153     if((uint32_t)code>0x10ffff) {
00154         return 0;
00155     }
00156 
00157     if(!isDataLoaded(pErrorCode)) {
00158         return 0;
00159     }
00160 
00161     /* try algorithmic names first */
00162     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
00163     i=*p;
00164     algRange=(AlgorithmicRange *)(p+1);
00165     while(i>0) {
00166         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
00167             return getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
00168         }
00169         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
00170         --i;
00171     }
00172 
00173     /* normal character name */
00174     return getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
00175 }
00176 
00177 U_CAPI UChar32 U_EXPORT2
00178 u_charFromName(UCharNameChoice nameChoice,
00179                const char *name,
00180                UErrorCode *pErrorCode) {
00181     FindName findName;
00182     AlgorithmicRange *algRange;
00183     uint32_t *p;
00184     uint32_t i;
00185     UChar32 c;
00186 
00187     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00188         return 0xffff;
00189     }
00190 
00191     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
00192         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00193         return 0xffff;
00194     }
00195 
00196     if(!isDataLoaded(pErrorCode)) {
00197         return 0xffff;
00198     }
00199 
00200     /* try algorithmic names first */
00201     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
00202     i=*p;
00203     algRange=(AlgorithmicRange *)(p+1);
00204     while(i>0) {
00205         if((c=findAlgName(algRange, nameChoice, name))!=0xffff) {
00206             return c;
00207         }
00208         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
00209         --i;
00210     }
00211 
00212     /* normal character name */
00213     findName.otherName=name;
00214     findName.code=0xffff;
00215     enumNames(uCharNames, 0, 0x110000, DO_FIND_NAME, &findName, nameChoice);
00216     return findName.code;
00217 }
00218 
00219 U_CAPI void U_EXPORT2
00220 u_enumCharNames(UChar32 start, UChar32 limit,
00221                 UEnumCharNamesFn *fn,
00222                 void *context,
00223                 UCharNameChoice nameChoice,
00224                 UErrorCode *pErrorCode) {
00225     AlgorithmicRange *algRange;
00226     uint32_t *p;
00227     uint32_t i;
00228 
00229     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00230         return;
00231     }
00232 
00233     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
00234         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00235         return;
00236     }
00237 
00238     if((uint32_t)limit>0x110000) {
00239         limit=0x110000;
00240     }
00241     if((uint32_t)start>=(uint32_t)limit) {
00242         return;
00243     }
00244 
00245     if(!isDataLoaded(pErrorCode)) {
00246         return;
00247     }
00248 
00249     /* interleave the data-driven ones with the algorithmic ones */
00250     /* iterate over all algorithmic ranges; assume that they are in ascending order */
00251     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
00252     i=*p;
00253     algRange=(AlgorithmicRange *)(p+1);
00254     while(i>0) {
00255         /* enumerate the character names before the current algorithmic range */
00256         /* here: start<limit */
00257         if((uint32_t)start<algRange->start) {
00258             if((uint32_t)limit<=algRange->start) {
00259                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
00260                 return;
00261             }
00262             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
00263                 return;
00264             }
00265             start=(UChar32)algRange->start;
00266         }
00267         /* enumerate the character names in the current algorithmic range */
00268         /* here: algRange->start<=start<limit */
00269         if((uint32_t)start<=algRange->end) {
00270             if((uint32_t)limit<=(algRange->end+1)) {
00271                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
00272                 return;
00273             }
00274             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
00275                 return;
00276             }
00277             start=(UChar32)algRange->end+1;
00278         }
00279         /* continue to the next algorithmic range (here: start<limit) */
00280         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
00281         --i;
00282     }
00283     /* enumerate the character names after the last algorithmic range */
00284     enumNames(uCharNames, start, limit, fn, context, nameChoice);
00285 }
00286 
00287 /* implementation ----------------------------------------------------------- */
00288 
00289 static UBool
00290 isDataLoaded(UErrorCode *pErrorCode) {
00291     /* load UCharNames from file if necessary */
00292     if(uCharNames==NULL) {
00293         UCharNames *names;
00294         UDataMemory *data;
00295 
00296         /* open the data outside the mutex block */
00297         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
00298         if(U_FAILURE(*pErrorCode)) {
00299             return FALSE;
00300         }
00301 
00302         names=(UCharNames *)udata_getMemory(data);
00303 
00304         /* in the mutex block, set the data for this process */
00305         {
00306             umtx_lock(NULL);
00307             if(uCharNames==NULL) {
00308                 uCharNames=names;
00309                 uCharNamesData=data;
00310                 data=NULL;
00311                 names=NULL;
00312             }
00313             umtx_unlock(NULL);
00314         }
00315 
00316         /* if a different thread set it first, then close the extra data */
00317         if(data!=NULL) {
00318             udata_close(data); /* NULL if it was set correctly */
00319         }
00320     }
00321     return TRUE;
00322 }
00323 
00324 static UBool
00325 isAcceptable(void *context,
00326              const char *type, const char *name,
00327              const UDataInfo *pInfo) {
00328     return (UBool)(
00329         pInfo->size>=20 &&
00330         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
00331         pInfo->charsetFamily==U_CHARSET_FAMILY &&
00332         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
00333         pInfo->dataFormat[1]==0x6e &&
00334         pInfo->dataFormat[2]==0x61 &&
00335         pInfo->dataFormat[3]==0x6d &&
00336         pInfo->formatVersion[0]==1);
00337 }
00338 
00339 /*
00340  * getGroup() does a binary search for the group that contains the
00341  * Unicode code point "code".
00342  * The return value is always a valid Group* that may contain "code"
00343  * or else is the highest group before "code".
00344  * If the lowest group is after "code", then that one is returned.
00345  */
00346 static Group *
00347 getGroup(UCharNames *names, uint32_t code) {
00348     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
00349              start=0,
00350              limit=*(uint16_t *)((char *)names+names->groupsOffset),
00351              number;
00352     Group *groups=(Group *)((char *)names+names->groupsOffset+2);
00353 
00354     /* binary search for the group of names that contains the one for code */
00355     while(start<limit-1) {
00356         number=(uint16_t)((start+limit)/2);
00357         if(groupMSB<groups[number].groupMSB) {
00358             limit=number;
00359         } else {
00360             start=number;
00361         }
00362     }
00363 
00364     /* return this regardless of whether it is an exact match */
00365     return groups+start;
00366 }
00367 
00368 static uint16_t
00369 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
00370         char *buffer, uint16_t bufferLength) {
00371     Group *group=getGroup(names, code);
00372     if((uint16_t)(code>>GROUP_SHIFT)==group->groupMSB) {
00373         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
00374                                buffer, bufferLength);
00375     } else {
00376         /* group not found */
00377         /* zero-terminate */
00378         if(bufferLength>0) {
00379             *buffer=0;
00380         }
00381         return 0;
00382     }
00383 }
00384 
00385 /*
00386  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
00387  * expands them into offsets and lengths for each string.
00388  * Lengths are stored with a variable-width encoding in consecutive nibbles:
00389  * If a nibble<0xc, then it is the length itself (0=empty string).
00390  * If a nibble>=0xc, then it forms a length value with the following nibble.
00391  * Calculation see below.
00392  * The offsets and lengths arrays must be at least 33 (one more) long because
00393  * there is no check here at the end if the last nibble is still used.
00394  */
00395 static const uint8_t *
00396 expandGroupLengths(const uint8_t *s,
00397                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
00398     /* read the lengths of the 32 strings in this group and get each string's offset */
00399     uint16_t i=0, offset=0, length=0;
00400     uint8_t lengthByte;
00401 
00402     /* all 32 lengths must be read to get the offset of the first group string */
00403     while(i<LINES_PER_GROUP) {
00404         lengthByte=*s++;
00405 
00406         /* read even nibble - MSBs of lengthByte */
00407         if(length>=12) {
00408             /* double-nibble length spread across two bytes */
00409             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
00410             lengthByte&=0xf;
00411         } else if((lengthByte /* &0xf0 */)>=0xc0) {
00412             /* double-nibble length spread across this one byte */
00413             length=(uint16_t)((lengthByte&0x3f)+12);
00414         } else {
00415             /* single-nibble length in MSBs */
00416             length=(uint16_t)(lengthByte>>4);
00417             lengthByte&=0xf;
00418         }
00419 
00420         *offsets++=offset;
00421         *lengths++=length;
00422 
00423         offset+=length;
00424         ++i;
00425 
00426         /* read odd nibble - LSBs of lengthByte */
00427         if((lengthByte&0xf0)==0) {
00428             /* this nibble was not consumed for a double-nibble length above */
00429             length=lengthByte;
00430             if(length<12) {
00431                 /* single-nibble length in LSBs */
00432                 *offsets++=offset;
00433                 *lengths++=length;
00434 
00435                 offset+=length;
00436                 ++i;
00437             }
00438         } else {
00439             length=0;   /* prevent double-nibble detection in the next iteration */
00440         }
00441     }
00442 
00443     /* now, s is at the first group string */
00444     return s;
00445 }
00446 
00447 static uint16_t
00448 expandGroupName(UCharNames *names, Group *group,
00449                 uint16_t lineNumber, UCharNameChoice nameChoice,
00450                 char *buffer, uint16_t bufferLength) {
00451     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
00452     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
00453                                     (group->offsetHigh<<16|group->offsetLow);
00454     s=expandGroupLengths(s, offsets, lengths);
00455     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
00456                       buffer, bufferLength);
00457 }
00458 
00459 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
00460     if((bufferLength)>0) { \
00461         *(buffer)++=c; \
00462         --(bufferLength); \
00463     } \
00464     ++(bufferPos); \
00465 }
00466 
00467 /*
00468  * Important: expandName() and compareName() are almost the same -
00469  * apply fixes to both.
00470  */
00471 static uint16_t
00472 expandName(UCharNames *names,
00473            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
00474            char *buffer, uint16_t bufferLength) {
00475     uint16_t *tokens=(uint16_t *)names+8;
00476     uint16_t token, tokenCount=*tokens++, bufferPos=0;
00477     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
00478     uint8_t c;
00479 
00480     if(nameChoice!=U_UNICODE_CHAR_NAME) {
00481         /*
00482          * skip the modern name if it is not requested _and_
00483          * if the semicolon byte value is a character, not a token number
00484          */
00485         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
00486             while(nameLength>0) {
00487                 --nameLength;
00488                 if(*name++==';') {
00489                     break;
00490                 }
00491             }
00492         } else {
00493             /*
00494              * the semicolon byte value is a token number, therefore
00495              * only modern names are stored in unames.dat and there is no
00496              * such requested Unicode 1.0 name here
00497              */
00498             nameLength=0;
00499         }
00500     }
00501 
00502     /* write each letter directly, and write a token word per token */
00503     while(nameLength>0) {
00504         --nameLength;
00505         c=*name++;
00506 
00507         if(c>=tokenCount) {
00508             if(c!=';') {
00509                 /* implicit letter */
00510                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
00511             } else {
00512                 /* finished */
00513                 break;
00514             }
00515         } else {
00516             token=tokens[c];
00517             if(token==(uint16_t)(-2)) {
00518                 /* this is a lead byte for a double-byte token */
00519                 token=tokens[c<<8|*name++];
00520                 --nameLength;
00521             }
00522             if(token==(uint16_t)(-1)) {
00523                 if(c!=';') {
00524                     /* explicit letter */
00525                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
00526                 } else {
00527                     /* finished */
00528                     break;
00529                 }
00530             } else {
00531                 /* write token word */
00532                 uint8_t *tokenString=tokenStrings+token;
00533                 while((c=*tokenString++)!=0) {
00534                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
00535                 }
00536             }
00537         }
00538     }
00539 
00540     /* zero-terminate */
00541     if(bufferLength>0) {
00542         *buffer=0;
00543     }
00544 
00545     return bufferPos;
00546 }
00547 
00548 /*
00549  * compareName() is almost the same as expandName() except that it compares
00550  * the currently expanded name to an input name.
00551  * It returns the match/no match result as soon as possible.
00552  */
00553 static UBool
00554 compareName(UCharNames *names,
00555             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
00556             const char *otherName) {
00557     uint16_t *tokens=(uint16_t *)names+8;
00558     uint16_t token, tokenCount=*tokens++;
00559     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
00560     uint8_t c;
00561 
00562     if(nameChoice!=U_UNICODE_CHAR_NAME) {
00563         /*
00564          * skip the modern name if it is not requested _and_
00565          * if the semicolon byte value is a character, not a token number
00566          */
00567         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
00568             while(nameLength>0) {
00569                 --nameLength;
00570                 if(*name++==';') {
00571                     break;
00572                 }
00573             }
00574         } else {
00575             /*
00576              * the semicolon byte value is a token number, therefore
00577              * only modern names are stored in unames.dat and there is no
00578              * such requested Unicode 1.0 name here
00579              */
00580             nameLength=0;
00581         }
00582     }
00583 
00584     /* compare each letter directly, and compare a token word per token */
00585     while(nameLength>0) {
00586         --nameLength;
00587         c=*name++;
00588 
00589         if(c>=tokenCount) {
00590             if(c!=';') {
00591                 /* implicit letter */
00592                 if((char)c!=*otherName++) {
00593                     return FALSE;
00594                 }
00595             } else {
00596                 /* finished */
00597                 break;
00598             }
00599         } else {
00600             token=tokens[c];
00601             if(token==(uint16_t)(-2)) {
00602                 /* this is a lead byte for a double-byte token */
00603                 token=tokens[c<<8|*name++];
00604                 --nameLength;
00605             }
00606             if(token==(uint16_t)(-1)) {
00607                 if(c!=';') {
00608                     /* explicit letter */
00609                     if((char)c!=*otherName++) {
00610                         return FALSE;
00611                     }
00612                 } else {
00613                     /* finished */
00614                     break;
00615                 }
00616             } else {
00617                 /* write token word */
00618                 uint8_t *tokenString=tokenStrings+token;
00619                 while((c=*tokenString++)!=0) {
00620                     if((char)c!=*otherName++) {
00621                         return FALSE;
00622                     }
00623                 }
00624             }
00625         }
00626     }
00627 
00628     /* complete match? */
00629     return (UBool)(*otherName==0);
00630 }
00631 
00632 /*
00633  * enumGroupNames() enumerates all the names in a 32-group
00634  * and either calls the enumerator function or finds a given input name.
00635  */
00636 static UBool
00637 enumGroupNames(UCharNames *names, Group *group,
00638                UChar32 start, UChar32 end,
00639                UEnumCharNamesFn *fn, void *context,
00640                UCharNameChoice nameChoice) {
00641     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
00642     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
00643                                     (group->offsetHigh<<16|group->offsetLow);
00644 
00645     s=expandGroupLengths(s, offsets, lengths);
00646     if(fn!=DO_FIND_NAME) {
00647         char buffer[200];
00648         uint16_t length;
00649 
00650         while(start<=end) {
00651             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice,
00652                               buffer, sizeof(buffer));
00653             /* here, we assume that the buffer is large enough */
00654             if(length>0) {
00655                 if(!fn(context, start, nameChoice, buffer, length)) {
00656                     return FALSE;
00657                 }
00658             }
00659             ++start;
00660         }
00661     } else {
00662         const char *otherName=((FindName *)context)->otherName;
00663         while(start<=end) {
00664             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
00665                 ((FindName *)context)->code=start;
00666                 return FALSE;
00667             }
00668             ++start;
00669         }
00670     }
00671     return TRUE;
00672 }
00673 
00674 static UBool
00675 enumNames(UCharNames *names,
00676           UChar32 start, UChar32 limit,
00677           UEnumCharNamesFn *fn, void *context,
00678           UCharNameChoice nameChoice) {
00679     uint16_t startGroupMSB, endGroupMSB, groupCount;
00680     Group *group, *groupLimit;
00681 
00682     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
00683     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
00684 
00685     /* find the group that contains start, or the highest before it */
00686     group=getGroup(names, start);
00687 
00688     if(startGroupMSB==endGroupMSB) {
00689         if(startGroupMSB==group->groupMSB) {
00690             /* if start and limit-1 are in the same group, then enumerate only in that one */
00691             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
00692         }
00693     } else {
00694         if(startGroupMSB==group->groupMSB) {
00695             /* enumerate characters in the partial start group */
00696             if((start&GROUP_MASK)!=0) {
00697                 if(!enumGroupNames(names, group,
00698                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
00699                                    fn, context, nameChoice)) {
00700                     return FALSE;
00701                 }
00702             }
00703             ++group; /* continue with the next group */
00704         } else if(startGroupMSB>group->groupMSB) {
00705             /* make sure that we start enumerating with the first group after start */
00706             ++group;
00707         }
00708 
00709         /* enumerate entire groups between the start- and end-groups */
00710         groupCount=*(uint16_t *)((char *)names+names->groupsOffset);
00711         groupLimit=(Group *)((char *)names+names->groupsOffset+2)+groupCount;
00712 
00713         while(group<groupLimit && group->groupMSB<endGroupMSB) {
00714             start=(UChar32)group->groupMSB<<GROUP_SHIFT;
00715             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
00716                 return FALSE;
00717             }
00718             ++group;
00719         }
00720 
00721         /* enumerate within the end group (group->groupMSB==endGroupMSB) */
00722         if(group<groupLimit && group->groupMSB==endGroupMSB) {
00723             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
00724         }
00725     }
00726     return TRUE;
00727 }
00728 
00729 /*
00730  * Important:
00731  * Parts of findAlgName() are almost the same as some of getAlgName().
00732  * Fixes must be applied to both.
00733  */
00734 static uint16_t
00735 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
00736         char *buffer, uint16_t bufferLength) {
00737     uint16_t bufferPos=0;
00738 
00739     /*
00740      * Do not write algorithmic Unicode 1.0 names because
00741      * Unihan names are the same as the modern ones,
00742      * extension A was only introduced with Unicode 3.0, and
00743      * the Hangul syllable block was moved and changed around Unicode 1.1.5.
00744      */
00745     if(nameChoice!=U_UNICODE_CHAR_NAME) {
00746         /* zero-terminate */
00747         if(bufferLength>0) {
00748             *buffer=0;
00749         }
00750         return 0;
00751     }
00752 
00753     switch(range->type) {
00754     case 0: {
00755         /* name = prefix hex-digits */
00756         const char *s=(const char *)(range+1);
00757         char c;
00758 
00759         uint16_t i, count;
00760 
00761         /* copy prefix */
00762         while((c=*s++)!=0) {
00763             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
00764         }
00765 
00766         /* write hexadecimal code point value */
00767         count=range->variant;
00768 
00769         /* zero-terminate */
00770         if(count<bufferLength) {
00771             buffer[count]=0;
00772         }
00773 
00774         for(i=count; i>0;) {
00775             if(--i<bufferLength) {
00776                 c=(char)(code&0xf);
00777                 if(c<10) {
00778                     c+='0';
00779                 } else {
00780                     c+='A'-10;
00781                 }
00782                 buffer[i]=c;
00783             }
00784             code>>=4;
00785         }
00786 
00787         bufferPos+=count;
00788         break;
00789     }
00790     case 1: {
00791         /* name = prefix factorized-elements */
00792         uint16_t indexes[8];
00793         const uint16_t *factors=(const uint16_t *)(range+1);
00794         uint16_t count=range->variant;
00795         const char *s=(const char *)(factors+count);
00796         char c;
00797 
00798         /* copy prefix */
00799         while((c=*s++)!=0) {
00800             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
00801         }
00802 
00803         bufferPos+=writeFactorSuffix(factors, count,
00804                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
00805         break;
00806     }
00807     default:
00808         /* undefined type */
00809         /* zero-terminate */
00810         if(bufferLength>0) {
00811             *buffer=0;
00812         }
00813         break;
00814     }
00815 
00816     return bufferPos;
00817 }
00818 
00819 static uint16_t
00820 writeFactorSuffix(const uint16_t *factors, uint16_t count,
00821                   const char *s, /* suffix elements */
00822                   uint32_t code,
00823                   uint16_t indexes[8], /* output fields from here */
00824                   const char *elementBases[8], const char *elements[8],
00825                   char *buffer, uint16_t bufferLength) {
00826     uint16_t i, factor, bufferPos=0;
00827     char c;
00828 
00829     /* write elements according to the factors */
00830 
00831     /*
00832      * the factorized elements are determined by modulo arithmetic
00833      * with the factors of this algorithm
00834      *
00835      * note that for fewer operations, count is decremented here
00836      */
00837     --count;
00838     for(i=count; i>0; --i) {
00839         factor=factors[i];
00840         indexes[i]=(uint16_t)(code%factor);
00841         code/=factor;
00842     }
00843     /*
00844      * we don't need to calculate the last modulus because start<=code<=end
00845      * guarantees here that code<=factors[0]
00846      */
00847     indexes[0]=(uint16_t)code;
00848 
00849     /* write each element */
00850     for(;;) {
00851         if(elementBases!=NULL) {
00852             *elementBases++=s;
00853         }
00854 
00855         /* skip indexes[i] strings */
00856         factor=indexes[i];
00857         while(factor>0) {
00858             while(*s++!=0) {}
00859             --factor;
00860         }
00861         if(elements!=NULL) {
00862             *elements++=s;
00863         }
00864 
00865         /* write element */
00866         while((c=*s++)!=0) {
00867             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
00868         }
00869 
00870         /* we do not need to perform the rest of this loop for i==count - break here */
00871         if(i>=count) {
00872             break;
00873         }
00874 
00875         /* skip the rest of the strings for this factors[i] */
00876         factor=(uint16_t)(factors[i]-indexes[i]-1);
00877         while(factor>0) {
00878             while(*s++!=0) {}
00879             --factor;
00880         }
00881 
00882         ++i;
00883     }
00884 
00885     /* zero-terminate */
00886     if(bufferLength>0) {
00887         *buffer=0;
00888     }
00889 
00890     return bufferPos;
00891 }
00892 
00893 /*
00894  * Important: enumAlgNames() and findAlgName() are almost the same.
00895  * Any fix must be applied to both.
00896  */
00897 static UBool
00898 enumAlgNames(AlgorithmicRange *range,
00899              UChar32 start, UChar32 limit,
00900              UEnumCharNamesFn *fn, void *context,
00901              UCharNameChoice nameChoice) {
00902     char buffer[200];
00903     uint16_t length;
00904 
00905     if(nameChoice!=U_UNICODE_CHAR_NAME) {
00906         return TRUE;
00907     }
00908 
00909     switch(range->type) {
00910     case 0: {
00911         char *s, *end;
00912         char c;
00913 
00914         /* get the full name of the start character */
00915         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
00916         if(length<=0) {
00917             return TRUE;
00918         }
00919 
00920         /* call the enumerator function with this first character */
00921         if(!fn(context, start, nameChoice, buffer, length)) {
00922             return FALSE;
00923         }
00924 
00925         /* go to the end of the name; all these names have the same length */
00926         end=buffer;
00927         while(*end!=0) {
00928             ++end;
00929         }
00930 
00931         /* enumerate the rest of the names */
00932         while(++start<limit) {
00933             /* increment the hexadecimal number on a character-basis */
00934             s=end;
00935             for (;;) {
00936                 c=*--s;
00937                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
00938                     *s=c+1;
00939                     break;
00940                 } else if(c=='9') {
00941                     *s='A';
00942                     break;
00943                 } else if(c=='F') {
00944                     *s='0';
00945                 }
00946             }
00947 
00948             if(!fn(context, start, nameChoice, buffer, length)) {
00949                 return FALSE;
00950             }
00951         }
00952         break;
00953     }
00954     case 1: {
00955         uint16_t indexes[8];
00956         const char *elementBases[8], *elements[8];
00957         const uint16_t *factors=(const uint16_t *)(range+1);
00958         uint16_t count=range->variant;
00959         const char *s=(const char *)(factors+count);
00960         char *suffix, *t;
00961         uint16_t prefixLength, i, index;
00962 
00963         char c;
00964 
00965         /* name = prefix factorized-elements */
00966 
00967         /* copy prefix */
00968         suffix=buffer;
00969         prefixLength=0;
00970         while((c=*s++)!=0) {
00971             *suffix++=c;
00972             ++prefixLength;
00973         }
00974 
00975         /* append the suffix of the start character */
00976         length=prefixLength+writeFactorSuffix(factors, count,
00977                                               s, (uint32_t)start-range->start,
00978                                               indexes, elementBases, elements,
00979                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength));
00980 
00981         /* call the enumerator function with this first character */
00982         if(!fn(context, start, nameChoice, buffer, length)) {
00983             return FALSE;
00984         }
00985 
00986         /* enumerate the rest of the names */
00987         while(++start<limit) {
00988             /* increment the indexes in lexical order bound by the factors */
00989             i=count;
00990             for (;;) {
00991                 index=indexes[--i]+1;
00992                 if(index<factors[i]) {
00993                     /* skip one index and its element string */
00994                     indexes[i]=index;
00995                     s=elements[i];
00996                     while(*s++!=0) {
00997                     }
00998                     elements[i]=s;
00999                     break;
01000                 } else {
01001                     /* reset this index to 0 and its element string to the first one */
01002                     indexes[i]=0;
01003                     elements[i]=elementBases[i];
01004                 }
01005             }
01006 
01007             /* to make matters a little easier, just append all elements to the suffix */
01008             t=suffix;
01009             length=prefixLength;
01010             for(i=0; i<count; ++i) {
01011                 s=elements[i];
01012                 while((c=*s++)!=0) {
01013                     *t++=c;
01014                     ++length;
01015                 }
01016             }
01017             /* zero-terminate */
01018             *t=0;
01019 
01020             if(!fn(context, start, nameChoice, buffer, length)) {
01021                 return FALSE;
01022             }
01023         }
01024         break;
01025     }
01026     default:
01027         /* undefined type */
01028         break;
01029     }
01030 
01031     return TRUE;
01032 }
01033 
01034 /*
01035  * findAlgName() is almost the same as enumAlgNames() except that it
01036  * returns the code point for a name if it fits into the range.
01037  * It returns 0xffff otherwise.
01038  */
01039 static UChar32
01040 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
01041     UChar32 code;
01042 
01043     if(nameChoice!=U_UNICODE_CHAR_NAME) {
01044         return 0xffff;
01045     }
01046 
01047     switch(range->type) {
01048     case 0: {
01049         /* name = prefix hex-digits */
01050         const char *s=(const char *)(range+1);
01051         char c;
01052 
01053         uint16_t i, count;
01054 
01055         /* compare prefix */
01056         while((c=*s++)!=0) {
01057             if((char)c!=*otherName++) {
01058                 return 0xffff;
01059             }
01060         }
01061 
01062         /* read hexadecimal code point value */
01063         count=range->variant;
01064         code=0;
01065         for(i=0; i<count; ++i) {
01066             c=*otherName++;
01067             if('0'<=c && c<='9') {
01068                 code=(code<<4)|(c-'0');
01069             } else if('A'<=c && c<='F') {
01070                 code=(code<<4)|(c-'A'+10);
01071             } else {
01072                 return 0xffff;
01073             }
01074         }
01075 
01076         /* does it fit into the range? */
01077         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
01078             return code;
01079         }
01080         break;
01081     }
01082     case 1: {
01083         char buffer[64];
01084         uint16_t indexes[8];
01085         const char *elementBases[8], *elements[8];
01086         const uint16_t *factors=(const uint16_t *)(range+1);
01087         uint16_t count=range->variant;
01088         const char *s=(const char *)(factors+count), *t;
01089         UChar32 start, limit;
01090         uint16_t i, index;
01091 
01092         char c;
01093 
01094         /* name = prefix factorized-elements */
01095 
01096         /* compare prefix */
01097         while((c=*s++)!=0) {
01098             if((char)c!=*otherName++) {
01099                 return 0xffff;
01100             }
01101         }
01102 
01103         start=(UChar32)range->start;
01104         limit=(UChar32)(range->end+1);
01105 
01106         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
01107         writeFactorSuffix(factors, count, s, 0,
01108                           indexes, elementBases, elements, buffer, sizeof(buffer));
01109 
01110         /* compare the first suffix */
01111         if(0==uprv_strcmp(otherName, buffer)) {
01112             return start;
01113         }
01114 
01115         /* enumerate and compare the rest of the suffixes */
01116         while(++start<limit) {
01117             /* increment the indexes in lexical order bound by the factors */
01118             i=count;
01119             for (;;) {
01120                 index=indexes[--i]+1;
01121                 if(index<factors[i]) {
01122                     /* skip one index and its element string */
01123                     indexes[i]=index;
01124                     s=elements[i];
01125                     while(*s++!=0) {}
01126                     elements[i]=s;
01127                     break;
01128                 } else {
01129                     /* reset this index to 0 and its element string to the first one */
01130                     indexes[i]=0;
01131                     elements[i]=elementBases[i];
01132                 }
01133             }
01134 
01135             /* to make matters a little easier, just compare all elements of the suffix */
01136             t=otherName;
01137             for(i=0; i<count; ++i) {
01138                 s=elements[i];
01139                 while((c=*s++)!=0) {
01140                     if(c!=*t++) {
01141                         s=""; /* does not match */
01142                         i=99;
01143                     }
01144                 }
01145             }
01146             if(i<99 && *t==0) {
01147                 return start;
01148             }
01149         }
01150         break;
01151     }
01152     default:
01153         /* undefined type */
01154         break;
01155     }
01156 
01157     return 0xffff;
01158 }
01159 
01160 /* this is a dummy function that is used as a "find not enumerate" flag */
01161 static UBool
01162 findNameDummy(void *context,
01163               UChar32 code, UCharNameChoice nameChoice,
01164               const char *name, UTextOffset length) {
01165     return FALSE;
01166 }

Generated at Tue Dec 5 10:48:07 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000