Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

normlzr.h

Go to the documentation of this file.
00001 /*
00002  ********************************************************************
00003  * COPYRIGHT: 
00004  * Copyright (c) 1996-1999, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 #include "unicode/unistr.h"
00014 #include "unicode/chariter.h"
00015 
00016 /* forward declaration */
00017 class ComposedCharIter;
00018 
00114 class U_COMMON_API Normalizer
00115 {
00116 
00117  public:
00118   // This tells us what the bits in the "mode" mean.
00119   enum {
00120     COMPAT_BIT         = 1,
00121     DECOMP_BIT         = 2,
00122     COMPOSE_BIT     = 4
00123   };
00124 
00125 
00126 
00128   enum {
00129       DONE=0xffff
00130   };
00131 
00133   enum EMode {
00134 
00147     NO_OP         = 0,
00148     
00163     COMPOSE         = COMPOSE_BIT,
00164 
00179     COMPOSE_COMPAT     = COMPOSE_BIT | COMPAT_BIT,
00180 
00195     DECOMP         = DECOMP_BIT,
00196 
00211     DECOMP_COMPAT     = DECOMP_BIT | COMPAT_BIT
00212   };
00213 
00215   enum {
00216 
00234     IGNORE_HANGUL     = 0x001
00235   };
00236 
00237   // Constructors
00238 
00249   Normalizer(const UnicodeString& str, 
00250          EMode mode);
00251     
00270   Normalizer(const UnicodeString& str, 
00271          EMode mode, 
00272          int32_t opt);
00273 
00286   Normalizer(const UChar* str,
00287          int32_t length,
00288          EMode mode);
00289 
00306   Normalizer(const UChar* str,
00307          int32_t length,
00308          EMode mode,
00309          int32_t option);
00310 
00322   Normalizer(const CharacterIterator& iter, 
00323          EMode mode);
00324 
00340   Normalizer(const CharacterIterator& iter, 
00341          EMode mode, 
00342          int32_t opt);
00343 
00348   Normalizer(const Normalizer& copy);
00349 
00354   ~Normalizer();
00355 
00356 
00357   //-------------------------------------------------------------------------
00358   // Static utility methods
00359   //-------------------------------------------------------------------------
00360 
00381   static void normalize(const UnicodeString& source, 
00382             EMode mode, 
00383             int32_t options,
00384             UnicodeString& result, 
00385             UErrorCode &status);
00386 
00410   static void compose(const UnicodeString& source, 
00411               UBool compat,
00412               int32_t options,
00413               UnicodeString& result, 
00414               UErrorCode &status);
00415 
00442   static void decompose(const UnicodeString& source, 
00443             UBool compat,
00444             int32_t options,
00445             UnicodeString& result, 
00446             UErrorCode &status);
00447 
00448 
00449   //-------------------------------------------------------------------------
00450   // CharacterIterator overrides
00451   //-------------------------------------------------------------------------
00452   
00457   UChar32              current(void) const;
00458 
00464   UChar32              first(void);
00465 
00472   UChar32              last(void);
00473 
00480   UChar32              next(void);
00481 
00488   UChar32              previous(void);
00489 
00507   UChar32              setIndex(UTextOffset index);
00508 
00517   void                reset(void);
00518 
00533   UTextOffset            getIndex(void) const;
00534 
00541   UTextOffset            startIndex(void) const;
00542 
00549   UTextOffset            endIndex(void) const;
00550 
00551 
00557   //  virtual UBool    operator==(const CharacterIterator& that) const;
00558   UBool        operator==(const Normalizer& that) const;
00559   inline UBool        operator!=(const Normalizer& that) const;
00560 
00566   Normalizer*        clone(void) const;
00567 
00572   int32_t                hashCode(void) const;
00573 
00574   //-------------------------------------------------------------------------
00575   // Property access methods
00576   //-------------------------------------------------------------------------
00577 
00604   void setMode(EMode newMode);
00605 
00612   EMode getMode(void) const;
00613 
00637   void setOption(int32_t option, 
00638          UBool value);
00639 
00646   UBool getOption(int32_t option) const;
00647 
00653   void setText(const UnicodeString& newText, 
00654            UErrorCode &status);
00655 
00661   void setText(const CharacterIterator& newText, 
00662            UErrorCode &status);
00663 
00669   void setText(const UChar* newText,
00670                     int32_t length,
00671             UErrorCode &status);
00678   void            getText(UnicodeString&  result);
00679 
00685   const UChar*     getText(int32_t&  count);
00686 
00687 private:
00688   // Private utility methods for iteration
00689   // For documentation, see the source code
00690   UChar nextCompose(void);
00691   UChar prevCompose(void);
00692   UChar nextDecomp(void);
00693   UChar prevDecomp(void);
00694 
00695   UChar curForward(void);
00696   UChar curBackward(void);
00697 
00698   void    init(CharacterIterator* iter, 
00699          EMode mode, 
00700          int32_t option);
00701   void    initBuffer(void);
00702   void    clearBuffer(void);
00703 
00704   // Utilities used by Compose
00705   static void        bubbleAppend(UnicodeString& target, 
00706                      UChar ch, 
00707                      uint32_t cclass);
00708   static uint32_t     getComposeClass(UChar ch);
00709   static uint16_t    composeLookup(UChar ch);
00710   static uint16_t    composeAction(uint16_t baseIndex, 
00711                       uint16_t comIndex);
00712   static void        explode(UnicodeString& target, 
00713                 uint16_t index);
00714   static UChar    pairExplode(UnicodeString& target, 
00715                     uint16_t action);
00716 
00717   // Utilities used by Decompose
00718   static void        fixCanonical(UnicodeString& result);    // Reorders combining marks
00719   static uint8_t    getClass(UChar ch);                    // Gets char's combining class
00720 
00721   // Other static utility methods
00722   static void doAppend(const UChar source[], 
00723                uint16_t offset, 
00724                UnicodeString& dest);
00725   static void doInsert(const UChar source[], 
00726                uint16_t offset, 
00727                UnicodeString& dest, 
00728                UTextOffset pos);
00729   static uint16_t doReplace(const UChar source[], 
00730                uint16_t offset, 
00731                UnicodeString& dest, 
00732                UTextOffset pos);
00733 
00734   static void hangulToJamo(UChar ch, 
00735                UnicodeString& result, 
00736                uint16_t decompLimit);
00737   static void jamoAppend(UChar ch, 
00738              uint16_t decompLimit, 
00739              UnicodeString& dest);
00740   static void jamoToHangul(UnicodeString& buffer, 
00741                UTextOffset start);
00742 
00743   //-------------------------------------------------------------------------
00744   // Private data
00745   //-------------------------------------------------------------------------
00746 
00747   EMode         fMode;
00748   int32_t       fOptions;
00749   int16_t    minDecomp;
00750 
00751   // The input text and our position in it
00752   CharacterIterator*  text;
00753 
00754   // A buffer for holding intermediate results
00755   UnicodeString       buffer;
00756   UTextOffset          bufferPos;
00757   UTextOffset          bufferLimit;
00758   UChar             currentChar;
00759 
00760   // Another buffer for use during iterative composition
00761   UnicodeString       explodeBuf;
00762 
00763   enum {
00764     EMPTY = -1,
00765     STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
00766     STR_LENGTH_MASK = 0x0003
00767   };
00768 
00769   enum {
00770     HANGUL_BASE = 0xac00,
00771     HANGUL_LIMIT = 0xd7a4,
00772     JAMO_LBASE = 0x1100,
00773     JAMO_VBASE = 0x1161,
00774     JAMO_TBASE = 0x11a7,
00775     JAMO_LCOUNT = 19,
00776     JAMO_VCOUNT = 21,
00777     JAMO_TCOUNT = 28,
00778     JAMO_NCOUNT = JAMO_VCOUNT * JAMO_TCOUNT
00779   };
00780 
00781   friend class ComposedCharIter;
00782 };
00783 
00784 inline UBool
00785 Normalizer::operator!= (const Normalizer& other) const
00786 { return ! operator==(other); }
00787 
00788 #endif // _NORMLZR

Generated at Fri Dec 15 12:12:33 2000 for ICU 1.7 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000