Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ubidi.c

00001 /*  
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999-2000, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  ubidi.c
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 1999jul27
00014 *   created by: Markus W. Scherer
00015 */
00016 
00017 /* set import/export definitions */
00018 #ifndef U_COMMON_IMPLEMENTATION
00019 #   define U_COMMON_IMPLEMENTATION
00020 #endif
00021 
00022 #include "cmemory.h"
00023 #include "unicode/utypes.h"
00024 #include "unicode/ustring.h"
00025 #include "unicode/uchar.h"
00026 #include "unicode/ubidi.h"
00027 #include "ubidiimp.h"
00028 
00029 /*
00030  * General implementation notes:
00031  *
00032  * Throughout the implementation, there are comments like (W2) that refer to
00033  * rules of the BiDi algorithm in its version 5, in this example to the second
00034  * rule of the resolution of weak types.
00035  *
00036  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
00037  * character according to UTF-16, the second UChar gets the directional property of
00038  * the entire character assigned, while the first one gets a BN, a boundary
00039  * neutral, type, which is ignored by most of the algorithm according to
00040  * rule (X9) and the implementation suggestions of the BiDi algorithm.
00041  *
00042  * Later, adjustWSLevels() will set the level for each BN to that of the
00043  * following character (UChar), which results in surrogate pairs getting the
00044  * same level on each of their surrogates.
00045  *
00046  * In a UTF-8 implementation, the same thing could be done: the last byte of
00047  * a multi-byte sequence would get the "real" property, while all previous
00048  * bytes of that sequence would get BN.
00049  *
00050  * It is not possible to assign all those parts of a character the same real
00051  * property because this would fail in the resolution of weak types with rules
00052  * that look at immediately surrounding types.
00053  *
00054  * As a related topic, this implementation does not remove Boundary Neutral
00055  * types from the input, but ignores them whereever this is relevant.
00056  * For example, the loop for the resolution of the weak types reads
00057  * types until it finds a non-BN.
00058  * Also, explicit embedding codes are neither changed into BN nor removed.
00059  * They are only treated the same way real BNs are.
00060  * As stated before, adjustWSLevels() takes care of them at the end.
00061  * For the purpose of conformance, the levels of all these codes
00062  * do not matter.
00063  *
00064  * Note that this implementation never modifies the dirProps
00065  * after the initial setup.
00066  *
00067  *
00068  * In this implementation, the resolution of weak types (Wn),
00069  * neutrals (Nn), and the assignment of the resolved level (In)
00070  * are all done in one single loop, in resolveImplicitLevels().
00071  * Changes of dirProp values are done on the fly, without writing
00072  * them back to the dirProps array.
00073  *
00074  *
00075  * This implementation contains code that allows to bypass steps of the
00076  * algorithm that are not needed on the specific paragraph
00077  * in order to speed up the most common cases considerably,
00078  * like text that is entirely LTR, or RTL text without numbers.
00079  *
00080  * Most of this is done by setting a bit for each directional property
00081  * in a flags variable and later checking for whether there are
00082  * any LTR characters or any RTL characters, or both, whether
00083  * there are any explicit embedding codes, etc.
00084  *
00085  * If the (Xn) steps are performed, then the flags are re-evaluated,
00086  * because they will then not contain the embedding codes any more
00087  * and will be adjusted for override codes, so that subsequently
00088  * more bypassing may be possible than what the initial flags suggested.
00089  *
00090  * If the text is not mixed-directional, then the
00091  * algorithm steps for the weak type resolution are not performed,
00092  * and all levels are set to the paragraph level.
00093  *
00094  * If there are no explicit embedding codes, then the (Xn) steps
00095  * are not performed.
00096  *
00097  * If embedding levels are supplied as a parameter, then all
00098  * explicit embedding codes are ignored, and the (Xn) steps
00099  * are not performed.
00100  *
00101  * White Space types could get the level of the run they belong to,
00102  * and are checked with a test of (flags&MASK_EMBEDDING) to
00103  * consider if the paragraph direction should be considered in
00104  * the flags variable.
00105  *
00106  * If there are no White Space types in the paragraph, then
00107  * (L1) is not necessary in adjustWSLevels().
00108  */
00109 
00110 /* prototypes --------------------------------------------------------------- */
00111 
00112 static void
00113 getDirProps(UBiDi *pBiDi, const UChar *text);
00114 
00115 static UBiDiDirection
00116 resolveExplicitLevels(UBiDi *pBiDi);
00117 
00118 static UBiDiDirection
00119 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
00120 
00121 static UBiDiDirection
00122 directionFromFlags(Flags flags);
00123 
00124 static void
00125 resolveImplicitLevels(UBiDi *pBiDi,
00126                       UTextOffset start, UTextOffset limit,
00127                       DirProp sor, DirProp eor);
00128 
00129 static void
00130 adjustWSLevels(UBiDi *pBiDi);
00131 
00132 /* to avoid some conditional statements, use tiny constant arrays */
00133 static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
00134 static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
00135 static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
00136 
00137 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
00138 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
00139 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
00140 
00141 /* UBiDi object management -------------------------------------------------- */
00142 
00143 U_CAPI UBiDi * U_EXPORT2
00144 ubidi_open(void) 
00145 {
00146     UErrorCode errorCode=U_ZERO_ERROR;
00147     return ubidi_openSized(0, 0, &errorCode);
00148 }
00149 
00150 U_CAPI UBiDi * U_EXPORT2
00151 ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErrorCode) {
00152     UBiDi *pBiDi;
00153 
00154     /* check the argument values */
00155     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00156         return NULL;
00157     } else if(maxLength<0 || maxRunCount<0) {
00158         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00159         return NULL;    /* invalid arguments */
00160     }
00161 
00162     /* allocate memory for the object */
00163     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
00164     if(pBiDi==NULL) {
00165         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
00166         return NULL;
00167     }
00168 
00169     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
00170     uprv_memset(pBiDi, 0, sizeof(UBiDi));
00171 
00172     /* allocate memory for arrays as requested */
00173     if(maxLength>0) {
00174         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
00175             !getInitialLevelsMemory(pBiDi, maxLength)
00176         ) {
00177             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
00178         }
00179     } else {
00180         pBiDi->mayAllocateText=TRUE;
00181     }
00182 
00183     if(maxRunCount>0) {
00184         if(maxRunCount==1) {
00185             /* use simpleRuns[] */
00186             pBiDi->runsSize=sizeof(Run);
00187         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
00188             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
00189         }
00190     } else {
00191         pBiDi->mayAllocateRuns=TRUE;
00192     }
00193 
00194     if(U_SUCCESS(*pErrorCode)) {
00195         return pBiDi;
00196     } else {
00197         ubidi_close(pBiDi);
00198         return NULL;
00199     }
00200 }
00201 
00202 /*
00203  * We are allowed to allocate memory if memory==NULL or
00204  * mayAllocate==TRUE for each array that we need.
00205  * We also try to grow and shrink memory as needed if we
00206  * allocate it.
00207  *
00208  * Assume sizeNeeded>0.
00209  * If *pMemory!=NULL, then assume *pSize>0.
00210  *
00211  * ### this realloc() may unnecessarily copy the old data,
00212  * which we know we don't need any more;
00213  * is this the best way to do this??
00214  */
00215 extern UBool
00216 getMemory(void **pMemory, UTextOffset *pSize, UBool mayAllocate, UTextOffset sizeNeeded) {
00217     /* check for existing memory */
00218     if(*pMemory==NULL) {
00219         /* we need to allocate memory */
00220         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
00221             *pSize=sizeNeeded;
00222             return TRUE;
00223         } else {
00224             return FALSE;
00225         }
00226     } else {
00227         /* there is some memory, is it enough or too much? */
00228         if(sizeNeeded>*pSize && !mayAllocate) {
00229             /* not enough memory, and we must not allocate */
00230             return FALSE;
00231         } else if(sizeNeeded!=*pSize && mayAllocate) {
00232             /* we may try to grow or shrink */
00233             void *memory;
00234 
00235             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
00236                 *pMemory=memory;
00237                 *pSize=sizeNeeded;
00238                 return TRUE;
00239             } else {
00240                 /* we failed to grow */
00241                 return FALSE;
00242             }
00243         } else {
00244             /* we have at least enough memory and must not allocate */
00245             return TRUE;
00246         }
00247     }
00248 }
00249 
00250 U_CAPI void U_EXPORT2
00251 ubidi_close(UBiDi *pBiDi) {
00252     if(pBiDi!=NULL) {
00253         if(pBiDi->dirPropsMemory!=NULL) {
00254             uprv_free(pBiDi->dirPropsMemory);
00255         }
00256         if(pBiDi->levelsMemory!=NULL) {
00257             uprv_free(pBiDi->levelsMemory);
00258         }
00259         if(pBiDi->runsMemory!=NULL) {
00260             uprv_free(pBiDi->runsMemory);
00261         }
00262         uprv_free(pBiDi);
00263     }
00264 }
00265 
00266 /* set to approximate "inverse BiDi" ---------------------------------------- */
00267 
00268 U_CAPI void U_EXPORT2
00269 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
00270     if(pBiDi!=NULL) {
00271         pBiDi->isInverse=isInverse;
00272     }
00273 }
00274 
00275 U_CAPI UBool U_EXPORT2
00276 ubidi_isInverse(UBiDi *pBiDi) {
00277     if(pBiDi!=NULL) {
00278         return pBiDi->isInverse;
00279     } else {
00280         return FALSE;
00281     }
00282 }
00283 
00284 /* ubidi_setPara ------------------------------------------------------------ */
00285 
00286 U_CAPI void U_EXPORT2
00287 ubidi_setPara(UBiDi *pBiDi, const UChar *text, UTextOffset length,
00288               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
00289               UErrorCode *pErrorCode) {
00290     UBiDiDirection direction;
00291 
00292     /* check the argument values */
00293     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00294         return;
00295     } else if(pBiDi==NULL || text==NULL ||
00296               ((UBIDI_MAX_EXPLICIT_LEVEL<paraLevel) && !IS_DEFAULT_LEVEL(paraLevel)) ||
00297               length<-1
00298     ) {
00299         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00300         return;
00301     }
00302 
00303     if(length==-1) {
00304         length=u_strlen(text);
00305     }
00306 
00307     /* initialize the UBiDi structure */
00308     pBiDi->text=text;
00309     pBiDi->length=length;
00310     pBiDi->paraLevel=paraLevel;
00311     pBiDi->direction=UBIDI_LTR;
00312     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
00313 
00314     pBiDi->dirProps=NULL;
00315     pBiDi->levels=NULL;
00316     pBiDi->runs=NULL;
00317 
00318     if(length==0) {
00319         /*
00320          * For an empty paragraph, create a UBiDi object with the paraLevel and
00321          * the flags and the direction set but without allocating zero-length arrays.
00322          * There is nothing more to do.
00323          */
00324         if(IS_DEFAULT_LEVEL(paraLevel)) {
00325             pBiDi->paraLevel&=1;
00326         }
00327         if(paraLevel&1) {
00328             pBiDi->flags=DIRPROP_FLAG(R);
00329             pBiDi->direction=UBIDI_RTL;
00330         } else {
00331             pBiDi->flags=DIRPROP_FLAG(L);
00332             pBiDi->direction=UBIDI_LTR;
00333         }
00334 
00335         pBiDi->runCount=0;
00336         return;
00337     }
00338 
00339     pBiDi->runCount=-1;
00340 
00341     /*
00342      * Get the directional properties,
00343      * the flags bit-set, and
00344      * determine the partagraph level if necessary.
00345      */
00346     if(getDirPropsMemory(pBiDi, length)) {
00347         pBiDi->dirProps=pBiDi->dirPropsMemory;
00348         getDirProps(pBiDi, text);
00349     } else {
00350         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
00351         return;
00352     }
00353 
00354     /* are explicit levels specified? */
00355     if(embeddingLevels==NULL) {
00356         /* no: determine explicit levels according to the (Xn) rules */\
00357         if(getLevelsMemory(pBiDi, length)) {
00358             pBiDi->levels=pBiDi->levelsMemory;
00359             direction=resolveExplicitLevels(pBiDi);
00360         } else {
00361             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
00362             return;
00363         }
00364     } else {
00365         /* set BN for all explicit codes, check that all levels are paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
00366         pBiDi->levels=embeddingLevels;
00367         direction=checkExplicitLevels(pBiDi, pErrorCode);
00368         if(U_FAILURE(*pErrorCode)) {
00369             return;
00370         }
00371     }
00372 
00373     /*
00374      * The steps after (X9) in the UBiDi algorithm are performed only if
00375      * the paragraph text has mixed directionality!
00376      */
00377     pBiDi->direction=direction;
00378     switch(direction) {
00379     case UBIDI_LTR:
00380         /* make sure paraLevel is even */
00381         pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
00382 
00383         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
00384         pBiDi->trailingWSStart=0;
00385         break;
00386     case UBIDI_RTL:
00387         /* make sure paraLevel is odd */
00388         pBiDi->paraLevel|=1;
00389 
00390         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
00391         pBiDi->trailingWSStart=0;
00392         break;
00393     default:
00394         /*
00395          * If there are no external levels specified and there
00396          * are no significant explicit level codes in the text,
00397          * then we can treat the entire paragraph as one run.
00398          * Otherwise, we need to perform the following rules on runs of
00399          * the text with the same embedding levels. (X10)
00400          * "Significant" explicit level codes are ones that actually
00401          * affect non-BN characters.
00402          * Examples for "insignificant" ones are empty embeddings
00403          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
00404          */
00405         if(embeddingLevels==NULL && !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
00406             resolveImplicitLevels(pBiDi, 0, length,
00407                                     GET_LR_FROM_LEVEL(pBiDi->paraLevel),
00408                                     GET_LR_FROM_LEVEL(pBiDi->paraLevel));
00409         } else {
00410             /* sor, eor: start and end types of same-level-run */
00411             UBiDiLevel *levels=pBiDi->levels;
00412             UTextOffset start, limit=0;
00413             UBiDiLevel level, nextLevel;
00414             DirProp sor, eor;
00415 
00416             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
00417             level=pBiDi->paraLevel;
00418             nextLevel=levels[0];
00419             if(level<nextLevel) {
00420                 eor=GET_LR_FROM_LEVEL(nextLevel);
00421             } else {
00422                 eor=GET_LR_FROM_LEVEL(level);
00423             }
00424 
00425             do {
00426                 /* determine start and limit of the run (end points just behind the run) */
00427 
00428                 /* the values for this run's start are the same as for the previous run's end */
00429                 sor=eor;
00430                 start=limit;
00431                 level=nextLevel;
00432 
00433                 /* search for the limit of this run */
00434                 while(++limit<length && levels[limit]==level) {}
00435 
00436                 /* get the correct level of the next run */
00437                 if(limit<length) {
00438                     nextLevel=levels[limit];
00439                 } else {
00440                     nextLevel=pBiDi->paraLevel;
00441                 }
00442 
00443                 /* determine eor from max(level, nextLevel); sor is last run's eor */
00444                 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
00445                     eor=GET_LR_FROM_LEVEL(nextLevel);
00446                 } else {
00447                     eor=GET_LR_FROM_LEVEL(level);
00448                 }
00449 
00450                 /* if the run consists of overridden directional types, then there
00451                    are no implicit types to be resolved */
00452                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
00453                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
00454                 } else {
00455                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
00456                     do {
00457                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
00458                     } while(start<limit);
00459                 }
00460             } while(limit<length);
00461         }
00462 
00463         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
00464         adjustWSLevels(pBiDi);
00465 
00466         /* for "inverse BiDi", ubidi_getRuns() modifies the levels of numeric runs following RTL runs */
00467         if(pBiDi->isInverse) {
00468             if(!ubidi_getRuns(pBiDi)) {
00469                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
00470                 return;
00471             }
00472         }
00473         break;
00474     }
00475 }
00476 
00477 /* perform (P2)..(P3) ------------------------------------------------------- */
00478 
00479 /*
00480  * Get the directional properties for the text,
00481  * calculate the flags bit-set, and
00482  * determine the partagraph level if necessary.
00483  */
00484 static void
00485 getDirProps(UBiDi *pBiDi, const UChar *text) {
00486     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
00487 
00488     UTextOffset i=0, i0, i1, length=pBiDi->length;
00489     Flags flags=0;      /* collect all directionalities in the text */
00490     UChar32 uchar;
00491     DirProp dirProp;
00492 
00493     if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) {
00494         /* determine the paragraph level (P2..P3) */
00495         for(;;) {
00496             i0=i;           /* index of first code unit */
00497             UTF_NEXT_CHAR(text, i, length, uchar);
00498             i1=i-1;         /* index of last code unit, gets the directional property */
00499             flags|=DIRPROP_FLAG(dirProps[i1]=dirProp=u_charDirection(uchar));
00500             if(i1>i0) {     /* set previous code units' properties to BN */
00501                 flags|=DIRPROP_FLAG(BN);
00502                 do {
00503                     dirProps[--i1]=BN;
00504                 } while(i1>i0);
00505             }
00506 
00507             if(dirProp==L) {
00508                 pBiDi->paraLevel=0;
00509                 break;
00510             } else if(dirProp==R || dirProp==AL) {
00511                 pBiDi->paraLevel=1;
00512                 break;
00513             } else if(i>=length) {
00514                 /*
00515                  * see comment in ubidi.h:
00516                  * the DEFAULT_XXX values are designed so that
00517                  * their bit 0 alone yields the intended default
00518                  */
00519                 pBiDi->paraLevel&=1;
00520                 break;
00521             }
00522         }
00523     }
00524 
00525     /* get the rest of the directional properties and the flags bits */
00526     while(i<length) {
00527         i0=i;           /* index of first code unit */
00528         UTF_NEXT_CHAR(text, i, length, uchar);
00529         i1=i-1;         /* index of last code unit, gets the directional property */
00530         flags|=DIRPROP_FLAG(dirProps[i1]=dirProp=u_charDirection(uchar));
00531         if(i1>i0) {     /* set previous code units' properties to BN */
00532             flags|=DIRPROP_FLAG(BN);
00533             do {
00534                 dirProps[--i1]=BN;
00535             } while(i1>i0);
00536         }
00537     }
00538     if(flags&MASK_EMBEDDING) {
00539         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
00540     }
00541 
00542     pBiDi->flags=flags;
00543 }
00544 
00545 /* perform (X1)..(X9) ------------------------------------------------------- */
00546 
00547 /*
00548  * Resolve the explicit levels as specified by explicit embedding codes.
00549  * Recalculate the flags to have them reflect the real properties
00550  * after taking the explicit embeddings into account.
00551  *
00552  * The BiDi algorithm is designed to result in the same behavior whether embedding
00553  * levels are externally specified (from "styled text", supposedly the preferred
00554  * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
00555  * That is why (X9) instructs to remove all explicit codes (and BN).
00556  * However, in a real implementation, this removal of these codes and their index
00557  * positions in the plain text is undesirable since it would result in
00558  * reallocated, reindexed text.
00559  * Instead, this implementation leaves the codes in there and just ignores them
00560  * in the subsequent processing.
00561  * In order to get the same reordering behavior, positions with a BN or an
00562  * explicit embedding code just get the same level assigned as the last "real"
00563  * character.
00564  *
00565  * Some implementations, not this one, then overwrite some of these
00566  * directionality properties at "real" same-level-run boundaries by
00567  * L or R codes so that the resolution of weak types can be performed on the
00568  * entire paragraph at once instead of having to parse it once more and
00569  * perform that resolution on same-level-runs.
00570  * This limits the scope of the implicit rules in effectively
00571  * the same way as the run limits.
00572  *
00573  * Instead, this implementation does not modify these codes.
00574  * On one hand, the paragraph has to be scanned for same-level-runs, but
00575  * on the other hand, this saves another loop to reset these codes,
00576  * or saves making and modifying a copy of dirProps[].
00577  *
00578  *
00579  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
00580  *
00581  *
00582  * Handling the stack of explicit levels (Xn):
00583  *
00584  * With the BiDi stack of explicit levels,
00585  * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
00586  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
00587  *
00588  * In order to have a correct push-pop semantics even in the case of overflows,
00589  * there are two overflow counters:
00590  * - countOver60 is incremented with each LRx at level 60
00591  * - from level 60, one RLx increases the level to 61
00592  * - countOver61 is incremented with each LRx and RLx at level 61
00593  *
00594  * Popping levels with PDF must work in the opposite order so that level 61
00595  * is correct at the correct point. Underflows (too many PDFs) must be checked.
00596  *
00597  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
00598  */
00599 
00600 static UBiDiDirection
00601 resolveExplicitLevels(UBiDi *pBiDi) {
00602     const DirProp *dirProps=pBiDi->dirProps;
00603     UBiDiLevel *levels=pBiDi->levels;
00604     
00605     UTextOffset i=0, length=pBiDi->length;
00606     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
00607     DirProp dirProp;
00608     UBiDiLevel level=pBiDi->paraLevel;
00609 
00610     UBiDiDirection direction;
00611 
00612     /* determine if the text is mixed-directional or single-directional */
00613     direction=directionFromFlags(flags);
00614 
00615     /* we may not need to resolve any explicit levels */
00616     if(direction!=UBIDI_MIXED) {
00617         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
00618     } else if(!(flags&MASK_EXPLICIT) || pBiDi->isInverse) {
00619         /* mixed, but all characters are at the same embedding level */
00620         /* or we are in "inverse BiDi" */
00621         /* set all levels to the paragraph level */
00622         for(i=0; i<length; ++i) {
00623             levels[i]=level;
00624         }
00625     } else {
00626         /* continue to perform (Xn) */
00627 
00628         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
00629         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
00630         UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
00631 
00632         UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
00633         uint32_t countOver60=0, countOver61=0;  /* count overflows of explicit levels */
00634 
00635         /* recalculate the flags */
00636         flags=0;
00637 
00638         /* since we assume that this is a single paragraph, we ignore (X8) */
00639         for(i=0; i<length; ++i) {
00640             dirProp=dirProps[i];
00641             switch(dirProp) {
00642             case LRE:
00643             case LRO:
00644                 /* (X3, X5) */
00645                 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
00646                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
00647                     stack[stackTop]=embeddingLevel;
00648                     ++stackTop;
00649                     embeddingLevel=newLevel;
00650                     if(dirProp==LRO) {
00651                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
00652                     } else {
00653                         embeddingLevel&=~UBIDI_LEVEL_OVERRIDE;
00654                     }
00655                 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
00656                     ++countOver61;
00657                 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
00658                     ++countOver60;
00659                 }
00660                 flags|=DIRPROP_FLAG(BN);
00661                 break;
00662             case RLE:
00663             case RLO:
00664                 /* (X2, X4) */
00665                 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
00666                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
00667                     stack[stackTop]=embeddingLevel;
00668                     ++stackTop;
00669                     embeddingLevel=newLevel;
00670                     if(dirProp==RLO) {
00671                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
00672                     } else {
00673                         embeddingLevel&=~UBIDI_LEVEL_OVERRIDE;
00674                     }
00675                 } else {
00676                     ++countOver61;
00677                 }
00678                 flags|=DIRPROP_FLAG(BN);
00679                 break;
00680             case PDF:
00681                 /* (X7) */
00682                 /* handle all the overflow cases first */
00683                 if(countOver61>0) {
00684                     --countOver61;
00685                 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
00686                     /* handle LRx overflows from level 60 */
00687                     --countOver60;
00688                 } else if(stackTop>0) {
00689                     /* this is the pop operation; it also pops level 61 while countOver60>0 */
00690                     --stackTop;
00691                     embeddingLevel=stack[stackTop];
00692                 /* } else { (underflow) */
00693                 }
00694                 flags|=DIRPROP_FLAG(BN);
00695                 break;
00696             case B:
00697                 /*
00698                  * We do not really expect to see a paragraph separator (B),
00699                  * but we should do something reasonable with it,
00700                  * especially at the end of the text.
00701                  */
00702                 stackTop=0;
00703                 countOver60=countOver61=0;
00704                 embeddingLevel=level=pBiDi->paraLevel;
00705                 flags|=DIRPROP_FLAG(B);
00706                 break;
00707             case BN:
00708                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
00709                 /* they will get their levels set correctly in adjustWSLevels() */
00710                 flags|=DIRPROP_FLAG(BN);
00711                 break;
00712             default:
00713                 /* all other types get the "real" level */
00714                 if(level!=embeddingLevel) {
00715                     level=embeddingLevel;
00716                     if(level&UBIDI_LEVEL_OVERRIDE) {
00717                         flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
00718                     } else {
00719                         flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
00720                     }
00721                 }
00722                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
00723                     flags|=DIRPROP_FLAG(dirProp);
00724                 }
00725                 break;
00726             }
00727 
00728             /*
00729              * We need to set reasonable levels even on BN codes and
00730              * explicit codes because we will later look at same-level runs (X10).
00731              */
00732             levels[i]=level;
00733         }
00734         if(flags&MASK_EMBEDDING) {
00735             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
00736         }
00737 
00738         /* subsequently, ignore the explicit codes and BN (X9) */
00739 
00740         /* again, determine if the text is mixed-directional or single-directional */
00741         pBiDi->flags=flags;
00742         direction=directionFromFlags(flags);
00743     }
00744     return direction;
00745 }
00746 
00747 /*
00748  * Use a pre-specified embedding levels array:
00749  *
00750  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
00751  * ignore all explicit codes (X9),
00752  * and check all the preset levels.
00753  *
00754  * Recalculate the flags to have them reflect the real properties
00755  * after taking the explicit embeddings into account.
00756  */
00757 static UBiDiDirection
00758 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
00759     const DirProp *dirProps=pBiDi->dirProps;
00760     UBiDiLevel *levels=pBiDi->levels;
00761     
00762     UTextOffset i, length=pBiDi->length;
00763     Flags flags=0;  /* collect all directionalities in the text */
00764     UBiDiLevel level, paraLevel=pBiDi->paraLevel;
00765 
00766     for(i=0; i<length; ++i) {
00767         level=levels[i];
00768         if(level&UBIDI_LEVEL_OVERRIDE) {
00769             /* keep the override flag in levels[i] but adjust the flags */
00770             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
00771             flags|=DIRPROP_FLAG_O(level);
00772         } else {
00773             /* set the flags */
00774             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]);
00775         }
00776         if(level<paraLevel || UBIDI_MAX_EXPLICIT_LEVEL<level) {
00777             /* level out of bounds */
00778             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00779             return UBIDI_LTR;
00780         }
00781     }
00782     if(flags&MASK_EMBEDDING) {
00783         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
00784     }
00785 
00786     /* determine if the text is mixed-directional or single-directional */
00787     pBiDi->flags=flags;
00788     return directionFromFlags(flags);
00789 }
00790 
00791 /* determine if the text is mixed-directional or single-directional */
00792 static UBiDiDirection
00793 directionFromFlags(Flags flags) {
00794     /* if the text contains AN and neutrals, then some neutrals may become RTL */
00795     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
00796         return UBIDI_LTR;
00797     } else if(!(flags&MASK_LTR)) {
00798         return UBIDI_RTL;
00799     } else {
00800         return UBIDI_MIXED;
00801     }
00802 }
00803 
00804 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
00805 
00806 /*
00807  * This implementation of the (Wn) rules applies all rules in one pass.
00808  * In order to do so, it needs a look-ahead of typically 1 character
00809  * (except for W5: sequences of ET) and keeps track of changes
00810  * in a rule Wp that affect a later Wq (p<q).
00811  *
00812  * historyOfEN is a variable-saver: it contains 4 boolean states;
00813  * a bit in it set to 1 means:
00814  *  bit 0: the current code is an EN after W2
00815  *  bit 1: the current code is an EN after W4
00816  *  bit 2: the previous code was an EN after W2
00817  *  bit 3: the previous code was an EN after W4
00818  * In other words, b0..1 have transitions of EN in the current iteration,
00819  * while b2..3 have the transitions of EN in the previous iteration.
00820  * A simple historyOfEN<<=2 suffices for the propagation.
00821  *
00822  * The (Nn) and (In) rules are also performed in that same single loop,
00823  * but effectively one iteration behind for white space.
00824  *
00825  * Since all implicit rules are performed in one step, it is not necessary
00826  * to actually store the intermediate directional properties in dirProps[].
00827  */
00828 
00829 #define EN_SHIFT 2
00830 #define EN_AFTER_W2 1
00831 #define EN_AFTER_W4 2
00832 #define EN_ALL 3
00833 #define PREV_EN_AFTER_W2 4
00834 #define PREV_EN_AFTER_W4 8
00835 
00836 static void
00837 resolveImplicitLevels(UBiDi *pBiDi,
00838                       UTextOffset start, UTextOffset limit,
00839                       DirProp sor, DirProp eor) {
00840     const DirProp *dirProps=pBiDi->dirProps;
00841     UBiDiLevel *levels=pBiDi->levels;
00842 
00843     UTextOffset i, next, neutralStart=-1;
00844     DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral=L;
00845     UBiDiLevel numberLevel;
00846     uint8_t historyOfEN;
00847 
00848     /* initialize: current at sor, next at start (it is start<limit) */
00849     next=start;
00850     dirProp=lastStrong=sor;
00851     nextDirProp=dirProps[next];
00852     historyOfEN=0;
00853 
00854     if(pBiDi->isInverse) {
00855         /*
00856          * For "inverse BiDi", we set the levels of numbers just like for
00857          * regular L characters, plus a flag that ubidi_getRuns() will use
00858          * to set a similar flag on the corresponding output run.
00859          */
00860         numberLevel=levels[start];
00861         if(numberLevel&1) {
00862             ++numberLevel;
00863         }
00864     } else {
00865         /* normal BiDi: least greater even level */
00866         numberLevel=(UBiDiLevel)((levels[start]+2)&~1);
00867     }
00868 
00869     /*
00870      * In all steps of this implementation, BN and explicit embedding codes
00871      * must be treated as if they didn't exist (X9).
00872      * They will get levels set before a non-neutral character, and remain
00873      * undefined before a neutral one, but adjustWSLevels() will take care
00874      * of all of them.
00875      */
00876     while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) {
00877         if(++next<limit) {
00878             nextDirProp=dirProps[next];
00879         } else {
00880             nextDirProp=eor;
00881             break;
00882         }
00883     }
00884 
00885     /*
00886      * Note: at the end of this file, there is a prototype
00887      * of a version of this function that uses a statetable
00888      * at the core of this state machine.
00889      * If you make changes to this state machine,
00890      * please update that prototype as well.
00891      */
00892 
00893     /* loop for entire run */
00894     while(next<limit) {
00895         /* advance */
00896         prevDirProp=dirProp;
00897         dirProp=nextDirProp;
00898         i=next;
00899         do {
00900             if(++next<limit) {
00901                 nextDirProp=dirProps[next];
00902             } else {
00903                 nextDirProp=eor;
00904                 break;
00905             }
00906         } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT);
00907         historyOfEN<<=EN_SHIFT;
00908 
00909         /* (W1..W7) */
00910         switch(dirProp) {
00911         case L:
00912             lastStrong=L;
00913             break;
00914         case R:
00915             lastStrong=R;
00916             break;
00917         case AL:
00918             /* (W3) */
00919             lastStrong=AL;
00920             dirProp=R;
00921             break;
00922         case EN:
00923             /* we have to set historyOfEN correctly */
00924             if(lastStrong==AL) {
00925                 /* (W2) */
00926                 dirProp=AN;
00927             } else {
00928                 if(lastStrong==L) {
00929                     /* (W7) */
00930                     dirProp=L;
00931                 }
00932                 /* this EN stays after (W2) and (W4) - at least before (W7) */
00933                 historyOfEN|=EN_ALL;
00934             }
00935             break;
00936         case ES:
00937             if( historyOfEN&PREV_EN_AFTER_W2 &&     /* previous was EN before (W4) */
00938                 nextDirProp==EN && lastStrong!=AL   /* next is EN and (W2) won't make it AN */
00939             ) {
00940                 /* (W4) */
00941                 if(lastStrong!=L) {
00942                     dirProp=EN;
00943                 } else {
00944                     /* (W7) */
00945                     dirProp=L;
00946                 }
00947                 historyOfEN|=EN_AFTER_W4;
00948             } else {
00949                 /* (W6) */
00950                 dirProp=ON;
00951             }
00952             break;
00953         case CS:
00954             if( historyOfEN&PREV_EN_AFTER_W2 &&     /* previous was EN before (W4) */
00955                 nextDirProp==EN && lastStrong!=AL   /* next is EN and (W2) won't make it AN */
00956             ) {
00957                 /* (W4) */
00958                 if(lastStrong!=L) {
00959                     dirProp=EN;
00960                 } else {
00961                     /* (W7) */
00962                     dirProp=L;
00963                 }
00964                 historyOfEN|=EN_AFTER_W4;
00965             } else if(prevDirProp==AN &&                    /* previous was AN */
00966                       (nextDirProp==AN ||                   /* next is AN */
00967                       (nextDirProp==EN && lastStrong==AL))  /* or (W2) will make it one */
00968             ) {
00969                 /* (W4) */
00970                 dirProp=AN;
00971             } else {
00972                 /* (W6) */
00973                 dirProp=ON;
00974             }
00975             break;
00976         case ET:
00977             /* get sequence of ET; advance only next, not current, previous or historyOfEN */
00978             if(next<limit) {
00979                 while(DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
00980                     if(++next<limit) {
00981                         nextDirProp=dirProps[next];
00982                     } else {
00983                         nextDirProp=eor;
00984                         break;
00985                     }
00986                 }
00987             }
00988 
00989             /* now process the sequence of ET like a single ET */
00990             if((historyOfEN&PREV_EN_AFTER_W4) ||     /* previous was EN before (W5) */
00991                 (nextDirProp==EN && lastStrong!=AL)   /* next is EN and (W2) won't make it AN */
00992             ) {
00993                 /* (W5) */
00994                 if(lastStrong!=L) {
00995                     dirProp=EN;
00996                 } else {
00997                     /* (W7) */
00998                     dirProp=L;
00999                 }
01000             } else {
01001                 /* (W6) */
01002                 dirProp=ON;
01003             }
01004 
01005             /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
01006             break;
01007         case NSM:
01008             /* (W1) */
01009             dirProp=prevDirProp;
01010             /* set historyOfEN back to prevDirProp's historyOfEN */
01011             historyOfEN>>=EN_SHIFT;
01012             /*
01013              * Technically, this should be done before the switch() in the form
01014              *      if(nextDirProp==NSM) {
01015              *          dirProps[next]=nextDirProp=dirProp;
01016              *      }
01017              *
01018              * - effectively one iteration ahead.
01019              * However, whether the next dirProp is NSM or is equal to the current dirProp
01020              * does not change the outcome of any condition in (W2)..(W7).
01021              */
01022             break;
01023         default:
01024             break;
01025         }
01026 
01027         /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
01028 
01029         /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
01030         /* for "inverse BiDi", treat neutrals like L */
01031         /* this is one iteration late for the neutrals */
01032         if(DIRPROP_FLAG(dirProp)&MASK_N) {
01033             if(neutralStart<0) {
01034                 /* start of a sequence of neutrals */
01035                 neutralStart=i;
01036                 beforeNeutral=prevDirProp;
01037             }
01038         } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
01039             /*
01040              * Note that all levels[] values are still the same at this
01041              * point because this function is called for an entire
01042              * same-level run.
01043              * Therefore, we need to read only one actual level.
01044              */
01045             UBiDiLevel level=levels[i];
01046 
01047             if(neutralStart>=0) {
01048                 UBiDiLevel final;
01049                 /* end of a sequence of neutrals (dirProp is "afterNeutral") */
01050                 if(!(pBiDi->isInverse)) {
01051                     if(beforeNeutral==L) {
01052                         if(dirProp==L) {
01053                             final=0;                /* make all neutrals L (N1) */
01054                         } else {
01055                             final=level;            /* make all neutrals "e" (N2) */
01056                         }
01057                     } else /* beforeNeutral is one of { R, EN, AN } */ {
01058                         if(dirProp==L) {
01059                             final=level;            /* make all neutrals "e" (N2) */
01060                         } else {
01061                             final=1;                /* make all neutrals R (N1) */
01062                         }
01063                     }
01064                 } else {
01065                     /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */
01066                     if(beforeNeutral!=R) {
01067                         if(dirProp!=R) {
01068                             final=0;                /* make all neutrals L (N1) */
01069                         } else {
01070                             final=level;            /* make all neutrals "e" (N2) */
01071                         }
01072                     } else /* beforeNeutral is one of { R, EN, AN } */ {
01073                         if(dirProp!=R) {
01074                             final=level;            /* make all neutrals "e" (N2) */
01075                         } else {
01076                             final=1;                /* make all neutrals R (N1) */
01077                         }
01078                     }
01079                 }
01080                 /* perform (In) on the sequence of neutrals */
01081                 if((level^final)&1) {
01082                     /* do something only if we need to _change_ the level */
01083                     do {
01084                         ++levels[neutralStart];
01085                     } while(++neutralStart<i);
01086                 }
01087                 neutralStart=-1;
01088             }
01089 
01090             /* perform (In) on the non-neutral character */
01091             /*
01092              * in the cases of (W5), processing a sequence of ET,
01093              * and of (X9), skipping BN,
01094              * there may be multiple characters from i to <next
01095              * that all get (virtually) the same dirProp and (really) the same level
01096              */
01097             if(dirProp==L) {
01098                 if(level&1) {
01099                     ++level;
01100                 } else {
01101                     i=next;     /* we keep the levels */
01102                 }
01103             } else if(dirProp==R) {
01104                 if(!(level&1)) {
01105                     ++level;
01106                 } else {
01107                     i=next;     /* we keep the levels */
01108                 }
01109             } else /* EN or AN */ {
01110                 /* this level depends on whether we do "inverse BiDi" */
01111                 level=numberLevel;
01112             }
01113 
01114             /* apply the new level to the sequence, if necessary */
01115             while(i<next) {
01116                 levels[i++]=level;
01117             }
01118         }
01119     }
01120 
01121     /* perform (Nn) - here,
01122        the character after the the neutrals is eor, which is either L or R */
01123     /* this is one iteration late for the neutrals */
01124     if(neutralStart>=0) {
01125         /*
01126          * Note that all levels[] values are still the same at this
01127          * point because this function is called for an entire
01128          * same-level run.
01129          * Therefore, we need to read only one actual level.
01130          */
01131         UBiDiLevel level=levels[neutralStart], final;
01132 
01133         /* end of a sequence of neutrals (eor is "afterNeutral") */
01134         if(!(pBiDi->isInverse)) {
01135             if(beforeNeutral==L) {
01136                 if(eor==L) {
01137                     final=0;                /* make all neutrals L (N1) */
01138                 } else {
01139                     final=level;            /* make all neutrals "e" (N2) */
01140                 }
01141             } else /* beforeNeutral is one of { R, EN, AN } */ {
01142                 if(eor==L) {
01143                     final=level;            /* make all neutrals "e" (N2) */
01144                 } else {
01145                     final=1;                /* make all neutrals R (N1) */
01146                 }
01147             }
01148         } else {
01149             /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */
01150             if(beforeNeutral!=R) {
01151                 if(eor!=R) {
01152                     final=0;                /* make all neutrals L (N1) */
01153                 } else {
01154                     final=level;            /* make all neutrals "e" (N2) */
01155                 }
01156             } else /* beforeNeutral is one of { R, EN, AN } */ {
01157                 if(eor!=R) {
01158                     final=level;            /* make all neutrals "e" (N2) */
01159                 } else {
01160                     final=1;                /* make all neutrals R (N1) */
01161                 }
01162             }
01163         }
01164         /* perform (In) on the sequence of neutrals */
01165         if((level^final)&1) {
01166             /* do something only if we need to _change_ the level */
01167             do {
01168                 ++levels[neutralStart];
01169             } while(++neutralStart<limit);
01170         }
01171     }
01172 }
01173 
01174 /* perform (L1) and (X9) ---------------------------------------------------- */
01175 
01176 /*
01177  * Reset the embedding levels for some non-graphic characters (L1).
01178  * This function also sets appropriate levels for BN, and
01179  * explicit embedding types that are supposed to have been removed
01180  * from the paragraph in (X9).
01181  */
01182 static void
01183 adjustWSLevels(UBiDi *pBiDi) {
01184     const DirProp *dirProps=pBiDi->dirProps;
01185     UBiDiLevel *levels=pBiDi->levels;
01186     UTextOffset i;
01187 
01188     if(pBiDi->flags&MASK_WS) {
01189         UBiDiLevel paraLevel=pBiDi->paraLevel;
01190         Flags flag;
01191 
01192         i=pBiDi->trailingWSStart;
01193         while(i>0) {
01194             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
01195             while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) {
01196                 levels[i]=paraLevel;
01197             }
01198 
01199             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
01200             /* here, i+1 is guaranteed to be <length */
01201             while(i>0) {
01202                 flag=DIRPROP_FLAG(dirProps[--i]);
01203                 if(flag&MASK_BN_EXPLICIT) {
01204                     levels[i]=levels[i+1];
01205                 } else if(flag&MASK_B_S) {
01206                     levels[i]=paraLevel;
01207                     break;
01208                 }
01209             }
01210         }
01211     }
01212 }
01213 
01214 /* -------------------------------------------------------------------------- */
01215 
01216 U_CAPI UBiDiDirection U_EXPORT2
01217 ubidi_getDirection(const UBiDi *pBiDi) {
01218     if(pBiDi!=NULL) {
01219         return pBiDi->direction;
01220     } else {
01221         return UBIDI_LTR;
01222     }
01223 }
01224 
01225 U_CAPI const UChar * U_EXPORT2
01226 ubidi_getText(const UBiDi *pBiDi) {
01227     if(pBiDi!=NULL) {
01228         return pBiDi->text;
01229     } else {
01230         return NULL;
01231     }
01232 }
01233 
01234 U_CAPI UTextOffset U_EXPORT2
01235 ubidi_getLength(const UBiDi *pBiDi) {
01236     if(pBiDi!=NULL) {
01237         return pBiDi->length;
01238     } else {
01239         return 0;
01240     }
01241 }
01242 
01243 U_CAPI UBiDiLevel U_EXPORT2
01244 ubidi_getParaLevel(const UBiDi *pBiDi) {
01245     if(pBiDi!=NULL) {
01246         return pBiDi->paraLevel;
01247     } else {
01248         return 0;
01249     }
01250 }
01251 
01252 /* statetable prototype ----------------------------------------------------- */
01253 
01254 /*
01255  * This is here for possible future
01256  * performance work and is not compiled right now.
01257  */
01258 
01259 #if 0
01260 /*
01261  * This is a piece of code that could be part of ubidi.c/resolveImplicitLevels().
01262  * It replaces in the (Wn) state machine the switch()-if()-cascade with
01263  * just a few if()s and a state table.
01264  */
01265 
01266 /* use the state table only for the following dirProp's */
01267 #define MASK_W_TABLE (FLAG(L)|FLAG(R)|FLAG(AL)|FLAG(EN)|FLAG(ES)|FLAG(CS)|FLAG(ET)|FLAG(AN))
01268 
01269 /*
01270  * inputs:
01271  *
01272  * 0..1 historyOfEN - 2b
01273  * 2    prevDirProp==AN - 1b
01274  * 3..4 lastStrong, one of { L, R, AL, none } - 2b
01275  * 5..7 dirProp, one of { L, R, AL, EN, ES, CS, ET, AN } - 3b
01276  * 8..9 nextDirProp, one of { EN, AN, other }
01277  *
01278  * total: 10b=1024 states
01279  */
01280 enum { _L, _R, _AL, _EN, _ES, _CS, _ET, _AN, _OTHER };  /* lastStrong, dirProp */
01281 enum { __EN, __AN, __OTHER };                           /* nextDirProp */
01282 
01283 #define LAST_STRONG_SHIFT 3
01284 #define DIR_PROP_SHIFT 5
01285 #define NEXT_DIR_PROP_SHIFT 8
01286 
01287 /* masks after shifting */
01288 #define LAST_STRONG_MASK 3
01289 #define DIR_PROP_MASK 7
01290 #define STATE_MASK 0x1f
01291 
01292 /* convert dirProp into _dirProp (above enum) */
01293 static DirProp inputDirProp[dirPropCount]={ _X<<DIR_PROP_SHIFT, ... };
01294 
01295 /* convert dirProp into __dirProp (above enum) */
01296 static DirProp inputNextDirProp[dirPropCount]={ __X<<NEXT_DIR_PROP_SHIFT, ... };
01297 
01298 /*
01299  * outputs:
01300  *
01301  * dirProp, one of { L, R, EN, AN, ON } - 3b
01302  *
01303  * 0..1 historyOfEN - 2b
01304  * 2    prevDirProp==AN - 1b
01305  * 3..4 lastStrong, one of { L, R, AL, none } - 2b
01306  * 5..7 new dirProp, one of { L, R, EN, AN, ON }
01307  *
01308  * total: 8 bits=1 byte per state
01309  */
01310 enum { ___L, ___R, ___EN, ___AN, ___ON, ___count };
01311 
01312 /* convert ___dirProp into dirProp (above enum) */
01313 static DirProp outputDirProp[___count]={ X, ... };
01314 
01315 /* state table */
01316 static uint8_t wnTable[1024]={ /* calculate with switch()-if()-cascade */ };
01317 
01318 static void
01319 resolveImplicitLevels(BiDi *pBiDi,
01320                       Index start, Index end,
01321                       DirProp sor, DirProp eor) {
01322     /* new variable */
01323     uint8_t state;
01324 
01325     /* remove variable lastStrong */
01326 
01327     /* set initial state (set lastStrong, the rest is 0) */
01328     state= sor==L ? 0 : _R<<LAST_STRONG_SHIFT;
01329 
01330     while(next<limit) {
01331         /* advance */
01332         prevDirProp=dirProp;
01333         dirProp=nextDirProp;
01334         i=next;
01335         do {
01336             if(++next<limit) {
01337                 nextDirProp=dirProps[next];
01338             } else {
01339                 nextDirProp=eor;
01340                 break;
01341             }
01342         } while(FLAG(nextDirProp)&MASK_BN_EXPLICIT);
01343 
01344         /* (W1..W7) */
01345         /* ### This may be more efficient with a switch(dirProp). */
01346         if(FLAG(dirProp)&MASK_W_TABLE) {
01347             state=wnTable[
01348                     ((int)state)|
01349                     inputDirProp[dirProp]|
01350                     inputNextDirProp[nextDirProp]
01351             ];
01352             dirProp=outputDirProp[state>>DIR_PROP_SHIFT];
01353             state&=STATE_MASK;
01354         } else if(dirProp==ET) {
01355             /* get sequence of ET; advance only next, not current, previous or historyOfEN */
01356             while(next<limit && FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
01357                 if(++next<limit) {
01358                     nextDirProp=dirProps[next];
01359                 } else {
01360                     nextDirProp=eor;
01361                     break;
01362                 }
01363             }
01364 
01365             state=wnTable[
01366                     ((int)state)|
01367                     _ET<<DIR_PROP_SHIFT|
01368                     inputNextDirProp[nextDirProp]
01369             ];
01370             dirProp=outputDirProp[state>>DIR_PROP_SHIFT];
01371             state&=STATE_MASK;
01372 
01373             /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
01374         } else if(dirProp==NSM) {
01375             /* (W1) */
01376             dirProp=prevDirProp;
01377             /* keep prevDirProp's EN and AN states! */
01378         } else /* other */ {
01379             /* set EN and AN states to 0 */
01380             state&=LAST_STRONG_MASK<<LAST_STRONG_SHIFT;
01381         }
01382 
01383         /* perform (Nn) and (In) as usual */
01384     }
01385     /* perform (Nn) and (In) as usual */
01386 }
01387 #endif

Generated at Tue Dec 5 10:47:54 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000