Main Page   Class Hierarchy   Compound List   File List   Header Files   Sources   Compound Members   File Members  

normlzr.h

00001 /*
00002  ********************************************************************
00003  * COPYRIGHT: 
00004  * Copyright (c) 1996-1999, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 #include "unicode/unistr.h"
00014 #include "unicode/chariter.h"
00015 
00016 /* forward declaration */
00017 class ComposedCharIter;
00018 
00106 class U_COMMON_API Normalizer
00107 {
00108 
00109  public:
00110   // This tells us what the bits in the "mode" mean.
00111   enum {
00112     COMPAT_BIT         = 1,
00113     DECOMP_BIT         = 2,
00114     COMPOSE_BIT     = 4
00115   };
00116 
00117 
00118 
00120   enum {
00121       DONE=0xffff
00122   };
00123 
00125   enum EMode {
00126 
00139     NO_OP         = 0,
00140     
00155     COMPOSE         = COMPOSE_BIT,
00156     
00171     COMPOSE_COMPAT     = COMPOSE_BIT | COMPAT_BIT,
00172     
00187     DECOMP         = DECOMP_BIT,
00188     
00203     DECOMP_COMPAT     = DECOMP_BIT | COMPAT_BIT
00204   };
00205 
00207   enum {
00208 
00226     IGNORE_HANGUL     = 0x001
00227   };
00228 
00229   // Constructors
00230   
00241   Normalizer(const UnicodeString& str, 
00242          EMode mode);
00243     
00262   Normalizer(const UnicodeString& str, 
00263          EMode mode, 
00264          int32_t opt);
00265   
00278   Normalizer(const UChar* str,
00279          int32_t length,
00280          EMode mode);
00281 
00298   Normalizer(const UChar* str,
00299          int32_t length,
00300          EMode mode,
00301                  int32_t option);
00302 
00314   Normalizer(const CharacterIterator& iter, 
00315          EMode mode);
00316   
00332   Normalizer(const CharacterIterator& iter, 
00333          EMode mode, 
00334          int32_t opt);
00335   
00340   Normalizer(const Normalizer& copy);
00341   
00346   ~Normalizer();
00347   
00348   
00349   //-------------------------------------------------------------------------
00350   // Static utility methods
00351   //-------------------------------------------------------------------------
00352   
00373   static void normalize(const UnicodeString& source, 
00374             EMode mode, 
00375             int32_t options,
00376             UnicodeString& result, 
00377             UErrorCode &status);
00378   
00402   static void compose(const UnicodeString& source, 
00403               UBool compat,
00404               int32_t options,
00405               UnicodeString& result, 
00406               UErrorCode &status);
00407   
00434   static void decompose(const UnicodeString& source, 
00435             UBool compat,
00436             int32_t options,
00437             UnicodeString& result, 
00438             UErrorCode &status);
00439 
00440 
00441   //-------------------------------------------------------------------------
00442   // CharacterIterator overrides
00443   //-------------------------------------------------------------------------
00444   
00449   UChar32              current(void) const;
00450   
00456   UChar32              first(void);
00457 
00464   UChar32              last(void);
00465   
00472   UChar32              next(void);
00473   
00480   UChar32              previous(void);
00481   
00499   UChar32              setIndex(UTextOffset index);
00500   
00509   void                reset(void);
00510   
00525   UTextOffset            getIndex(void) const;
00526   
00533   UTextOffset            startIndex(void) const;
00534   
00541   UTextOffset            endIndex(void) const;
00542   
00543   
00549   //  virtual UBool    operator==(const CharacterIterator& that) const;
00550   UBool        operator==(const Normalizer& that) const;
00551   inline UBool        operator!=(const Normalizer& that) const;
00552   
00558   Normalizer*        clone(void) const;
00559   
00564   int32_t                hashCode(void) const;
00565 
00566   //-------------------------------------------------------------------------
00567   // Property access methods
00568   //-------------------------------------------------------------------------
00569   
00596   void setMode(EMode newMode);
00597   
00604   EMode getMode(void) const;
00605   
00629   void setOption(int32_t option, 
00630          UBool value);
00631   
00638   UBool getOption(int32_t option) const;
00639   
00645   void setText(const UnicodeString& newText, 
00646            UErrorCode &status);
00647   
00653   void setText(const CharacterIterator& newText, 
00654            UErrorCode &status);
00655   
00661   void setText(const UChar* newText,
00662                     int32_t length,
00663             UErrorCode &status);
00670   void            getText(UnicodeString&  result);
00671   
00677   const UChar*     getText(int32_t&  count);
00678 
00679 private:
00680   // Private utility methods for iteration
00681   // For documentation, see the source code
00682   UChar nextCompose(void);
00683   UChar prevCompose(void);
00684   UChar nextDecomp(void);
00685   UChar prevDecomp(void);
00686   
00687   UChar curForward(void);
00688   UChar curBackward(void);
00689   
00690   void    init(CharacterIterator* iter, 
00691          EMode mode, 
00692          int32_t option);
00693   void    initBuffer(void);
00694   void    clearBuffer(void);
00695   
00696   // Utilities used by Compose
00697   static void        bubbleAppend(UnicodeString& target, 
00698                      UChar ch, 
00699                      uint32_t cclass);
00700   static uint32_t     getComposeClass(UChar ch);
00701   static uint16_t    composeLookup(UChar ch);
00702   static uint16_t    composeAction(uint16_t baseIndex, 
00703                       uint16_t comIndex);
00704   static void        explode(UnicodeString& target, 
00705                 uint16_t index);
00706   static UChar    pairExplode(UnicodeString& target, 
00707                     uint16_t action);
00708   
00709   // Utilities used by Decompose
00710   static void        fixCanonical(UnicodeString& result);    // Reorders combining marks
00711   static uint8_t    getClass(UChar ch);                    // Gets char's combining class
00712   
00713   // Other static utility methods
00714   static void doAppend(const UChar source[], 
00715                uint16_t offset, 
00716                UnicodeString& dest);
00717   static void doInsert(const UChar source[], 
00718                uint16_t offset, 
00719                UnicodeString& dest, 
00720                UTextOffset pos);
00721   
00722   static void hangulToJamo(UChar ch, 
00723                UnicodeString& result, 
00724                uint16_t decompLimit);
00725   static void jamoAppend(UChar ch, 
00726              uint16_t decompLimit, 
00727              UnicodeString& dest);
00728   static void jamoToHangul(UnicodeString& buffer, 
00729                UTextOffset start);
00730   
00731   //-------------------------------------------------------------------------
00732   // Private data
00733   //-------------------------------------------------------------------------
00734   
00735   EMode         fMode;
00736   int32_t       fOptions;
00737   int16_t    minDecomp;
00738   
00739   // The input text and our position in it
00740   CharacterIterator*  text;
00741   
00742   // A buffer for holding intermediate results
00743   UnicodeString       buffer;
00744   UTextOffset          bufferPos;
00745   UTextOffset          bufferLimit;
00746   UChar             currentChar;
00747   
00748   // Another buffer for use during iterative composition
00749   UnicodeString       explodeBuf;
00750   
00751   enum {
00752     EMPTY = -1,
00753     STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
00754     STR_LENGTH_MASK = 0x0003
00755   };
00756 
00757   enum {
00758     HANGUL_BASE = 0xac00,
00759     HANGUL_LIMIT = 0xd7a4,
00760     JAMO_LBASE = 0x1100,
00761     JAMO_VBASE = 0x1161,
00762     JAMO_TBASE = 0x11a7,
00763     JAMO_LCOUNT = 19,
00764     JAMO_VCOUNT = 21,
00765     JAMO_TCOUNT = 28,
00766     JAMO_NCOUNT = JAMO_VCOUNT * JAMO_TCOUNT
00767   };
00768   
00769   friend class ComposedCharIter;
00770 };
00771 
00772 inline UBool
00773 Normalizer::operator!= (const Normalizer& other) const
00774 { return ! operator==(other); }
00775 
00776 #endif // _NORMLZR

Generated at Mon Jun 5 12:53:04 2000 for ICU1.5 by doxygen 1.0.0 written by Dimitri van Heesch, © 1997-1999