Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

IndicReordering.h

Go to the documentation of this file.
00001 /*
00002  * @(#)IndicReordering.h        1.4 00/03/15
00003  *
00004  * (C) Copyright IBM Corp. 1998, 1999, 2000 - All Rights Reserved
00005  *
00006  */
00007 
00008 #ifndef __INDICREORDERING_H
00009 #define __INDICREORDERING_H
00010 
00011 #include "LETypes.h"
00012 #include "OpenTypeTables.h"
00013 
00014 // Characters that get refered to by name...
00015 enum
00016 {
00017     C_SIGN_ZWNJ     = 0x200C,
00018     C_SIGN_ZWJ      = 0x200D
00019 };
00020 
00021 typedef LEUnicode SplitMatra[3];
00022 
00023 struct IndicClassTable
00024 {
00025     enum CharClassValues
00026     {
00027         CC_RESERVED             = 0,
00028         CC_MODIFYING_MARK_ABOVE = 1,
00029         CC_MODIFYING_MARK_POST  = 2,
00030         CC_INDEPENDENT_VOWEL    = 3,
00031         CC_CONSONANT            = 4,
00032         CC_CONSONANT_WITH_NUKTA = 5,
00033         CC_NUKTA                = 6,
00034         CC_DEPENDENT_VOWEL      = 7,
00035         CC_VIRAMA               = 8,
00036         CC_ZERO_WIDTH_MARK      = 9,
00037         CC_COUNT                = 10
00038     };
00039 
00040     enum CharClassFlags
00041     {
00042         CF_CLASS_MASK   = 0x0000FFFF,
00043 
00044         CF_CONSONANT    = 0x80000000,
00045 
00046         CF_REPH         = 0x40000000,
00047         CF_VATTU        = 0x20000000,
00048         CF_BELOW_BASE   = 0x10000000,
00049         CF_POST_BASE    = 0x08000000,
00050 
00051         CF_MATRA_PRE    = 0x04000000,
00052         CF_MATRA_BELOW  = 0x02000000,
00053         CF_MATRA_ABOVE  = 0x01000000,
00054         CF_MATRA_POST   = 0x00800000,
00055         CF_LENGTH_MARK  = 0x00400000,
00056         CF_INDEX_MASK   = 0x000F0000,
00057         CF_INDEX_SHIFT  = 16
00058     };
00059 
00060     typedef le_int32 CharClass;
00061 
00062     enum ScriptFlagBits
00063     {
00064         SF_MATRAS_AFTER_BASE    = 0x80000000,
00065         SF_REPH_AFTER_BELOW     = 0x40000000,
00066         SF_EYELASH_RA           = 0x20000000,
00067         SF_MPRE_FIXUP           = 0x10000000,
00068 
00069         SF_POST_BASE_LIMIT_MASK = 0x0000FFFF,
00070         SF_NO_POST_BASE_LIMIT   = 0x00007FFF
00071     };
00072 
00073     typedef le_int32 ScriptFlags;
00074 
00075     LEUnicode firstChar;
00076     LEUnicode lastChar;
00077     le_int32 worstCaseExpansion;
00078     ScriptFlags scriptFlags;
00079     const CharClass *classTable;
00080     const SplitMatra *splitMatraTable;
00081 
00082     le_int32 getWorstCaseExpansion() const;
00083 
00084     CharClass getCharClass(LEUnicode ch) const;
00085     const SplitMatra *getSplitMatra(CharClass charClass) const;
00086 
00087     le_bool isVMabove(LEUnicode ch) const;
00088     le_bool isVMpost(LEUnicode ch) const;
00089     le_bool isConsonant(LEUnicode ch) const;
00090     le_bool isReph(LEUnicode ch) const;
00091     le_bool isVirama(LEUnicode ch) const;
00092     le_bool isNukta(LEUnicode ch) const;
00093     le_bool isVattu(LEUnicode ch) const;
00094     le_bool isMatra(LEUnicode ch) const;
00095     le_bool isSplitMatra(LEUnicode ch) const;
00096     le_bool isMpre(LEUnicode ch) const;
00097     le_bool isMbelow(LEUnicode ch) const;
00098     le_bool isMabove(LEUnicode ch) const;
00099     le_bool isMpost(LEUnicode ch) const;
00100     le_bool isLengthMark(LEUnicode ch) const;
00101     le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
00102     le_bool hasPostBaseForm(LEUnicode ch) const;
00103     le_bool hasBelowBaseForm(LEUnicode ch) const;
00104 
00105     static le_bool isVMabove(CharClass charClass);
00106     static le_bool isVMpost(CharClass charClass);
00107     static le_bool isConsonant(CharClass charClass);
00108     static le_bool isReph(CharClass charClass);
00109     static le_bool isVirama(CharClass charClass);
00110     static le_bool isNukta(CharClass charClass);
00111     static le_bool isVattu(CharClass charClass);
00112     static le_bool isMatra(CharClass charClass);
00113     static le_bool isSplitMatra(CharClass charClass);
00114     static le_bool isMpre(CharClass charClass);
00115     static le_bool isMbelow(CharClass charClass);
00116     static le_bool isMabove(CharClass charClass);
00117     static le_bool isMpost(CharClass charClass);
00118     static le_bool isLengthMark(CharClass charClass);
00119     static le_bool hasPostOrBelowBaseForm(CharClass charClass);
00120     static le_bool hasPostBaseForm(CharClass charClass);
00121     static le_bool hasBelowBaseForm(CharClass charClass);
00122 
00123     static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
00124 };
00125 
00126 class IndicReordering
00127 {
00128 public:
00129     static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
00130 
00131     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
00132         LEUnicode *outChars, le_int32 *charIndices, const LETag **charTags);
00133 
00134     static void adjustMPres(const LEUnicode *chars, le_int32 charCount, LEGlyphID *glyphs,
00135         le_int32 *charIndices, le_int32 scriptCode);
00136 
00137 private:
00138     static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
00139 
00140 };
00141 
00142 inline le_int32 IndicClassTable::getWorstCaseExpansion() const
00143 {
00144     return worstCaseExpansion;
00145 }
00146 
00147 inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
00148 {
00149     le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
00150 
00151     return &splitMatraTable[index - 1];
00152 }
00153 
00154 inline le_bool IndicClassTable::isVMabove(LEUnicode ch) const
00155 {
00156     return isVMabove(getCharClass(ch));
00157 }
00158 
00159 inline le_bool IndicClassTable::isVMpost(LEUnicode ch) const
00160 {
00161     return isVMpost(getCharClass(ch));
00162 }
00163 
00164 inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
00165 {
00166     return isConsonant(getCharClass(ch));
00167 }
00168 
00169 inline le_bool IndicClassTable::isReph(LEUnicode ch) const
00170 {
00171     return isReph(getCharClass(ch));
00172 }
00173 
00174 inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
00175 {
00176     return isVirama(getCharClass(ch));
00177 }
00178 
00179 inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
00180 {
00181     return isNukta(getCharClass(ch));
00182 }
00183 
00184 inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
00185 {
00186     return isVattu(getCharClass(ch));
00187 }
00188 
00189 inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
00190 {
00191     return isMatra(getCharClass(ch));
00192 }
00193 
00194 inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
00195 {
00196     return isSplitMatra(getCharClass(ch));
00197 }
00198 
00199 inline le_bool IndicClassTable::isMpre(LEUnicode ch) const
00200 {
00201     return isMpre(getCharClass(ch));
00202 }
00203 
00204 inline le_bool IndicClassTable::isMbelow(LEUnicode ch) const
00205 {
00206     return isMbelow(getCharClass(ch));
00207 }
00208 
00209 inline le_bool IndicClassTable::isMabove(LEUnicode ch) const
00210 {
00211     return isMabove(getCharClass(ch));
00212 }
00213 
00214 inline le_bool IndicClassTable::isMpost(LEUnicode ch) const
00215 {
00216     return isMpost(getCharClass(ch));
00217 }
00218 
00219 inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
00220 {
00221     return isLengthMark(getCharClass(ch));
00222 }
00223 
00224 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
00225 {
00226     return hasPostOrBelowBaseForm(getCharClass(ch));
00227 }
00228 
00229 inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
00230 {
00231     return hasPostBaseForm(getCharClass(ch));
00232 }
00233 
00234 inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
00235 {
00236     return hasBelowBaseForm(getCharClass(ch));
00237 }
00238 
00239 inline le_bool IndicClassTable::isVMabove(CharClass charClass)
00240 {
00241     return (charClass & CF_CLASS_MASK) == CC_MODIFYING_MARK_ABOVE;
00242 }
00243 
00244 inline le_bool IndicClassTable::isVMpost(CharClass charClass)
00245 {
00246     return (charClass & CF_CLASS_MASK) == CC_MODIFYING_MARK_POST;
00247 }
00248 
00249 inline le_bool IndicClassTable::isConsonant(CharClass charClass)
00250 {
00251     return (charClass & CF_CONSONANT) != 0;
00252 }
00253 
00254 inline le_bool IndicClassTable::isReph(CharClass charClass)
00255 {
00256     return (charClass & CF_REPH) != 0;
00257 }
00258 
00259 inline le_bool IndicClassTable::isNukta(CharClass charClass)
00260 {
00261     return (charClass & CF_CLASS_MASK) == CC_NUKTA;
00262 }
00263 
00264 inline le_bool IndicClassTable::isVirama(CharClass charClass)
00265 {
00266     return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
00267 }
00268 
00269 inline le_bool IndicClassTable::isVattu(CharClass charClass)
00270 {
00271     return (charClass & CF_VATTU) != 0;
00272 }
00273 
00274 inline le_bool IndicClassTable::isMatra(CharClass charClass)
00275 {
00276     return (charClass & CF_CLASS_MASK) == CC_DEPENDENT_VOWEL;
00277 }
00278 
00279 inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
00280 {
00281     return (charClass & CF_INDEX_MASK) != 0;
00282 }
00283 
00284 inline le_bool IndicClassTable::isMpre(CharClass charClass)
00285 {
00286     return (charClass & CF_MATRA_PRE) != 0;
00287 }
00288 
00289 inline le_bool IndicClassTable::isMbelow(CharClass charClass)
00290 {
00291     return (charClass & CF_MATRA_BELOW) != 0;
00292 }
00293 
00294 inline le_bool IndicClassTable::isMabove(CharClass charClass)
00295 {
00296     return (charClass & CF_MATRA_ABOVE) != 0;
00297 }
00298 
00299 inline le_bool IndicClassTable::isMpost(CharClass charClass)
00300 {
00301     return (charClass & CF_MATRA_POST) != 0;
00302 }
00303 
00304 inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
00305 {
00306     return (charClass & CF_LENGTH_MARK) != 0;
00307 }
00308 
00309 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
00310 {
00311     return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
00312 }
00313 
00314 inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
00315 {
00316     return (charClass & CF_POST_BASE) != 0;
00317 }
00318 
00319 inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
00320 {
00321     return (charClass & CF_BELOW_BASE) != 0;
00322 }
00323 
00324 #endif

Generated at Tue Dec 5 18:13:01 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000