Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

ushape.c

00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2000, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  ushape.c
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2000jun29
00014 *   created by: Markus W. Scherer
00015 */
00016 
00017 #include "unicode/utypes.h"
00018 #include "unicode/uchar.h"
00019 #include "unicode/ustring.h"
00020 #include "cmemory.h"
00021 #include "unicode/ushape.h"
00022 
00023 #if UTF_SIZE<16
00024     /*
00025      * This implementation assumes that the internal encoding is UTF-16
00026      * or UTF-32, not UTF-8.
00027      * The main assumption is that the Arabic characters and their
00028      * presentation forms each fit into a single UChar.
00029      * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
00030      * characters.
00031      */
00032 #   error This implementation assumes UTF-16 or UTF-32 (check UTF_SIZE)
00033 #endif
00034 
00035 /*
00036  * This function shapes European digits to Arabic-Indic digits
00037  * in-place, writing over the input characters.
00038  * Since we know that we are only looking for BMP code points,
00039  * we can safely just work with code units (again, at least UTF-16).
00040  */
00041 static void
00042 _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
00043                                 UChar digitBase,
00044                                 UBool isLogical, UBool lastStrongWasAL) {
00045     int32_t i;
00046     UChar c;
00047 
00048     digitBase-=0x30;
00049 
00050     /* the iteration direction depends on the type of input */
00051     if(isLogical) {
00052         for(i=0; i<length; ++i) {
00053             c=s[i];
00054             switch(u_charDirection(c)) {
00055             case U_LEFT_TO_RIGHT: /* L */
00056             case U_RIGHT_TO_LEFT: /* R */
00057                 lastStrongWasAL=FALSE;
00058                 break;
00059             case U_RIGHT_TO_LEFT_ARABIC: /* AL */
00060                 lastStrongWasAL=TRUE;
00061                 break;
00062             case U_EUROPEAN_NUMBER: /* EN */
00063                 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
00064                     s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
00065                 }
00066                 break;
00067             default :
00068                 break;
00069             }
00070         }
00071     } else {
00072         for(i=length; i>0; /* pre-decrement in the body */) {
00073             c=s[--i];
00074             switch(u_charDirection(c)) {
00075             case U_LEFT_TO_RIGHT: /* L */
00076             case U_RIGHT_TO_LEFT: /* R */
00077                 lastStrongWasAL=FALSE;
00078                 break;
00079             case U_RIGHT_TO_LEFT_ARABIC: /* AL */
00080                 lastStrongWasAL=TRUE;
00081                 break;
00082             case U_EUROPEAN_NUMBER: /* EN */
00083                 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
00084                     s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
00085                 }
00086                 break;
00087             default :
00088                 break;
00089             }
00090         }
00091     }
00092 }
00093 
00094 U_CAPI int32_t U_EXPORT2
00095 u_shapeArabic(const UChar *source, int32_t sourceLength,
00096               UChar *dest, int32_t destSize,
00097               uint32_t options,
00098               UErrorCode *pErrorCode) {
00099     /* usual error checking */
00100     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
00101         return 0;
00102     }
00103 
00104     /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
00105     if( source==NULL || sourceLength<-1 ||
00106         (dest==NULL && destSize!=0) || destSize<0 ||
00107         options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
00108         (options&U_SHAPE_LENGTH_MASK)==U_SHAPE_LENGTH_RESERVED ||
00109         (options&U_SHAPE_LETTERS_MASK)==U_SHAPE_LETTERS_RESERVED ||
00110         (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
00111     ) {
00112         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00113         return 0;
00114     }
00115 
00116     /* determine the source length */
00117     if(sourceLength==-1) {
00118         sourceLength=u_strlen(source);
00119     }
00120     if(sourceLength==0) {
00121         return 0;
00122     }
00123 
00124     /* check that source and destination do not overlap */
00125     if( dest!=NULL &&
00126         ((source<=dest && dest<source+sourceLength) ||
00127          (dest<=source && source<dest+destSize))
00128     ) {
00129         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
00130         return 0;
00131     }
00132 
00133     if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
00134         /* currently, only number shaping is supported */
00135         *pErrorCode=U_UNSUPPORTED_ERROR;
00136         return 0;
00137     } else {
00138         /*
00139          * No letter shaping:
00140          * just make sure the destination is large enough and copy the string.
00141          */
00142         if(destSize<sourceLength) {
00143             /* this catches preflighting, too */
00144             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
00145             return sourceLength;
00146         }
00147         uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
00148         destSize=sourceLength;
00149     }
00150 
00151     /*
00152      * Perform number shaping.
00153      * With UTF-16 or UTF-32, the length of the string is constant.
00154      * The easiest way to do this is to operate on the destination and
00155      * "shape" the digits in-place.
00156      */
00157     if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
00158         UChar digitBase;
00159         int32_t i;
00160 
00161         /* select the requested digit group */
00162         switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
00163         case U_SHAPE_DIGIT_TYPE_AN:
00164             digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
00165             break;
00166         case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
00167             digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
00168             break;
00169         default:
00170             /* will never occur because of validity checks above */
00171             digitBase=0;
00172             break;
00173         }
00174 
00175         /* perform the requested operation */
00176         switch(options&U_SHAPE_DIGITS_MASK) {
00177         case U_SHAPE_DIGITS_EN2AN:
00178             /* add (digitBase-'0') to each European (ASCII) digit code point */
00179             digitBase-=0x30;
00180             for(i=0; i<destSize; ++i) {
00181                 if(((uint32_t)dest[i]-0x30)<10) {
00182                     dest[i]+=digitBase;
00183                 }
00184             }
00185             break;
00186         case U_SHAPE_DIGITS_AN2EN:
00187             /* subtract (digitBase-'0') from each Arabic digit code point */
00188             for(i=0; i<destSize; ++i) {
00189                 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
00190                     dest[i]-=digitBase-0x30;
00191                 }
00192             }
00193             break;
00194         case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
00195             _shapeToArabicDigitsWithContext(dest, destSize,
00196                                             digitBase,
00197                                             (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
00198                                             FALSE);
00199             break;
00200         case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
00201             _shapeToArabicDigitsWithContext(dest, destSize,
00202                                             digitBase,
00203                                             (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
00204                                             TRUE);
00205             break;
00206         default:
00207             /* will never occur because of validity checks above */
00208             break;
00209         }
00210     }
00211 
00212     return destSize;
00213 }

Generated at Tue Dec 5 10:48:12 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000