Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

tblcoll.h

Go to the documentation of this file.
00001 /*
00002 * Copyright © {1996-1999}, International Business Machines Corporation and others. All Rights Reserved.
00003 *******************************************************************************
00004 *
00005 * File tblcoll.h
00006 *
00007 * Created by: Helena Shih
00008 *
00009 * Modification History:
00010 *
00011 *  Date        Name        Description
00012 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
00013 *                          constructor which reads RuleBasedCollator object from
00014 *                          a binary file.  Added writeToFile method which streams
00015 *                          RuleBasedCollator out to a binary file.  The streamIn
00016 *                          and streamOut methods use istream and ostream objects
00017 *                          in binary mode.
00018 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
00019 *                          hold invariant data.
00020 *  2/13/97     aliu        Moved several methods into this class from Collation.
00021 *                          Added a private RuleBasedCollator(Locale&) constructor,
00022 *                          to be used by Collator::createDefault().  General
00023 *                          clean up.
00024 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
00025 *                          constructor and getDynamicClassID.
00026 *  3/5/97      aliu        Modified constructFromFile() to add parameter
00027 *                          specifying whether or not binary loading is to be
00028 *                          attempted.  This is required for dynamic rule loading.
00029 * 05/07/97     helena      Added memory allocation error detection.
00030 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to 
00031 *                          use MergeCollation::getPattern.
00032 *  6/20/97     helena      Java class name change.
00033 *  8/18/97     helena      Added internal API documentation.
00034 * 09/03/97     helena      Added createCollationKeyValues().
00035 * 02/10/98     damiba      Added compare with "length" parameter
00036 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
00037 * 04/23/99     stephen     Removed EDecompositionMode, merged with
00038 *                          Normalizer::EMode
00039 * 06/14/99     stephen     Removed kResourceBundleSuffix
00040 * 11/02/99     helena      Collator performance enhancements.  Eliminates the 
00041 *                          UnicodeString construction and special case for NO_OP.
00042 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
00043 *                          internal state management.
00044 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
00045 *                          to implementation file.
00046 *******************************************************************************
00047 */
00048 
00049 #ifndef TBLCOLL_H
00050 #define TBLCOLL_H
00051 
00052 #include "unicode/utypes.h"
00053 #include "unicode/coll.h"
00054 #include "unicode/chariter.h"
00055 #include "unicode/unistr.h"
00056 #include "unicode/sortkey.h"
00057 #include "unicode/normlzr.h"
00058 
00059 class VectorOfPToContractElement;
00060 class VectorOfInt;
00061 class VectorOfPToContractTable;
00062 class VectorOfPToExpandTable;
00063 class MergeCollation;
00064 class CollationElementIterator;
00065 class RuleBasedCollatorStreamer;
00066 class NormalizerIterator; // see tblcoll.cpp
00067 class Collator;
00068 class TableCollationData;
00069 
00351 class U_I18N_API RuleBasedCollator : public Collator 
00352 {
00353 public: 
00354 
00355   // constructor/destructor
00364         RuleBasedCollator(const UnicodeString& rules,
00365                         UErrorCode& status);
00366 
00367   RuleBasedCollator(  const   UnicodeString&  rules,
00368               ECollationStrength collationStrength,
00369               UErrorCode&      status);
00370 
00371   RuleBasedCollator(  const   UnicodeString&  rules,
00372               Normalizer::EMode decompositionMode,
00373               UErrorCode&      status);
00374 
00375   RuleBasedCollator(  const   UnicodeString&  rules,
00376               ECollationStrength collationStrength,
00377               Normalizer::EMode  decompositionMode,
00378               UErrorCode&      status);
00379 
00383         virtual ~RuleBasedCollator();
00384 
00385 
00389         RuleBasedCollator(const RuleBasedCollator& other);
00390 
00395         RuleBasedCollator& operator=(const RuleBasedCollator& other);
00396     
00401   virtual UBool                  operator==(const Collator& other) const;
00402 
00407   virtual UBool                  operator!=(const Collator& other) const;
00408 
00414   virtual Collator*               clone(void) const;
00415 
00425         virtual CollationElementIterator* createCollationElementIterator(const UnicodeString& source) const;
00426 
00437   virtual CollationElementIterator*       createCollationElementIterator(const CharacterIterator& source) const;
00438 
00452   virtual     EComparisonResult   compare(    const   UnicodeString&  source, 
00453                           const   UnicodeString&  target) const;
00454         
00455         
00470   virtual     EComparisonResult   compare(    const   UnicodeString&  source, 
00471                           const   UnicodeString&  target,
00472                           int32_t length) const;
00473 
00503   virtual EComparisonResult   compare(    const   UChar* source, 
00504                       int32_t sourceLength,
00505                       const   UChar*  target,
00506                       int32_t targetLength) const ;
00507 
00508   virtual EComparisonResult   compareEx(    const   UChar* source, 
00509                       int32_t sourceLength,
00510                       const   UChar*  target,
00511                       int32_t targetLength) const ;
00512 
00524   virtual     CollationKey&       getCollationKey(    const   UnicodeString&  source,
00525                               CollationKey&   key,
00526                               UErrorCode&  status) const;
00527 
00539   virtual CollationKey&       getCollationKey(const UChar *source,
00540                                               int32_t sourceLength,
00541                                               CollationKey&       key,
00542                                               UErrorCode&      status) const;
00543 
00544   virtual CollationKey&       getCollationKeyEx(const UChar *source,
00545                                               int32_t sourceLength,
00546                                               CollationKey&       key,
00547                                               UErrorCode&      status) const;
00548 
00554   virtual     int32_t             hashCode(void) const;
00555 
00562   const       UnicodeString&      getRules(void) const;
00563 
00575         int32_t getMaxExpansion(int32_t order) const;
00576 
00587   virtual UClassID getDynamicClassID(void) const
00588     { return RuleBasedCollator::getStaticClassID(); }
00589 
00590 
00601   static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
00602 
00603 
00613   uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
00614 
00620         UnicodeString getRules(UColRuleOption delta);
00621 
00622   /* New APIs for 1.7. Not yet implemented */
00623 
00631 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status);
00632 
00640 virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status);
00641 
00647 virtual Collator* safeClone(void);
00648 
00649 
00661 virtual EComparisonResult compare(ForwardCharacterIterator &source,
00662                                                                  ForwardCharacterIterator &target);
00663 
00672   virtual int32_t       getSortKey(const   UnicodeString&  source,
00673                                                   uint8_t *result,
00674                                                   int32_t resultLength) const;
00675 
00686   virtual int32_t       getSortKey(const   UChar *source,
00687                                                   int32_t sourceLength,
00688                                                   uint8_t *result,
00689                                                   int32_t resultLength) const;
00690 
00691 
00692  /*****************************************************************************
00693  * PRIVATE
00694  *****************************************************************************/
00695 private:
00696   static      char                fgClassID;
00697 
00698         // Streamer used to read/write binary collation data files.
00699   friend        class                RuleBasedCollatorStreamer;
00700 
00701   // Used to iterate over collation elements in a character source.
00702   friend      class               CollationElementIterator;
00703         
00704   // Collator ONLY needs access to RuleBasedCollator(const Locale&, UErrorCode&)
00705   friend class Collator;
00706         
00707 // This is just temporary, for prototyping.
00708   friend int32_t ucol_getNextCE(const UCollator *coll, collIterate *source, UErrorCode *status);
00709   friend int32_t ucol_getIncrementalCE(const UCollator *coll, incrementalContext *source, UErrorCode *status); 
00710   friend int32_t getComplicatedCE(const UCollator *coll, collIterate *source, UErrorCode *status);
00711   friend int32_t ucol_calcSortKey(const    UCollator    *coll,
00712         const    UChar        *source,
00713         int32_t        sourceLength,
00714         uint8_t        **result,
00715         int32_t        resultLength,
00716         UBool allocatePrimary);
00717   friend UCollationResult ucol_strcoll(    const    UCollator    *coll,
00718         const    UChar        *source,
00719         int32_t            sourceLength,
00720         const    UChar        *target,
00721         int32_t            targetLength);
00722   friend int32_t ucol_getSortKeySize(const UCollator *coll, 
00723         collIterate *s, 
00724         int32_t currentSize, 
00725         UColAttributeValue strength, 
00726         int32_t len);
00727   friend void *ucol_getABuffer(const UCollator *coll, uint32_t size);
00728 
00729 
00730 
00731 
00734   RuleBasedCollator();
00735 
00741   int32_t                addExpansion(int32_t anOrder,
00742                              const UnicodeString &expandChars);
00748   void                build(  const   UnicodeString&  rules,
00749                   UErrorCode&      success);
00750 
00754   void                addComposedChars(void);
00755 
00759   void                commit(void);
00766   int32_t             increment(  Collator::ECollationStrength    s, 
00767                   int32_t                         lastOrder);
00774   void                addOrder(   UChar        ch, 
00775                   int32_t        anOrder, 
00776                   UErrorCode&  status);
00784   void                addExpandOrder(const    UnicodeString&          groupChars, 
00785                      const    UnicodeString&          expChars, 
00786                      int32_t                            anOrder,
00787                      UErrorCode&                       status);
00794   void                addContractOrder(const  UnicodeString&          groupChars, 
00795                        int32_t                        anOrder,
00796                        UErrorCode&                     status);
00804   void                addContractOrder(const  UnicodeString&          groupChars, 
00805                        int32_t                        anOrder,
00806                        UBool                            fwd,
00807                        UErrorCode&                     status);
00815   int32_t                getContractOrder(const    UnicodeString            &groupChars) const;
00823   VectorOfPToContractElement* 
00824   getContractValues(UChar     ch) const;
00832   VectorOfPToContractElement* 
00833   getContractValues(int32_t     index) const;
00841   VectorOfInt*        getExpandValueList(int32_t     order) const;
00842 
00848   int32_t                getCharOrder(UChar ch) const;
00849 
00856   static        int32_t             getEntry(   VectorOfPToContractElement*     list, 
00857                         const   UnicodeString&          name,
00858                         UBool                    fwd);
00859 
00869   UBool              writeToFile(const char* fileName) const; // True on success
00870   /*  UBool              prepareForBundle() const;*/
00871 
00878   void                addToCache(         const UnicodeString& key);
00879 
00888   RuleBasedCollator(      const Locale& desiredLocale,
00889               UErrorCode& status);
00905   void                constructFromRules( const UnicodeString& rules,
00906                       UErrorCode& status);
00907   void                constructFromFile(  const Locale&           locale,
00908                       const UnicodeString&    localeFileName,
00909                       UBool                  tryBinaryFile,
00910                       UErrorCode&              status);
00911   void                constructFromFile(  const char* fileName,
00912                       UErrorCode& status);
00913   void                constructFromCache( const UnicodeString& key,
00914                       UErrorCode& status);
00915   const char*         constructFromBundle(const Locale& fileName,
00916                                           UErrorCode& status);
00917 
00918 
00919   //--------------------------------------------------------------------------
00920   // Internal Static Utility Methods
00928   static  char*               createPathName( const UnicodeString&    prefix,
00929                           const UnicodeString&    name,
00930                           const UnicodeString&    suffix);
00931 
00932   static UBool initMutex(void);
00933 
00934   int32_t getStrengthOrder(NormalizerIterator* cursor, 
00935                                     UErrorCode status) const;
00936   VectorOfInt* makeReorderedBuffer(NormalizerIterator* cursor,
00937                                    UChar colFirst,
00938                                    int32_t lastValue,
00939                                    VectorOfInt* lastExpansion) const;
00940   int32_t strengthOrder(int32_t value) const ;
00941   int32_t nextContractChar(NormalizerIterator *cursor, 
00942                            UChar ch,
00943                            UErrorCode& status) const;
00949   static  void                chopLocale(UnicodeString&   localeName);
00950 
00951   //--------------------------------------------------------------------------
00952   // Constants
00953 
00954   static  const   int32_t             UNMAPPED;
00955   static  const   int32_t             CHARINDEX;  // need look up in .commit()
00956   static  const   int32_t             EXPANDCHARINDEX; // Expand index follows
00957   static  const   int32_t             CONTRACTCHARINDEX;  // contract indexes follow
00958 
00959   static  const   int32_t             PRIMARYORDERINCREMENT;
00960   static  const   int32_t             MAXIGNORABLE;
00961   static  const   int32_t             SECONDARYORDERINCREMENT;
00962   static  const   int32_t             TERTIARYORDERINCREMENT;
00963   static  const   int32_t             PRIMARYORDERMASK;
00964   static  const   int32_t             SECONDARYORDERMASK;
00965   static  const   int32_t             TERTIARYORDERMASK;
00966   static  const   int32_t             SECONDARYRESETMASK;
00967   static  const   int32_t             IGNORABLEMASK;
00968   static  const   int32_t             PRIMARYDIFFERENCEONLY;
00969   static  const   int32_t             SECONDARYDIFFERENCEONLY;
00970   static  const   int32_t             PRIMARYORDERSHIFT;
00971   static  const   int32_t             SECONDARYORDERSHIFT;
00972   static  const   int32_t             SORTKEYOFFSET;
00973   static  const   int32_t             CONTRACTCHAROVERFLOW;
00974 
00975   static  const   int32_t             COLELEMENTSTART;
00976   static  const   int32_t             PRIMARYLOWZEROMASK;
00977   static  const   int32_t             RESETSECONDARYTERTIARY;
00978   static  const   int32_t             RESETTERTIARY;
00979 
00980   static  const   int32_t             IGNORABLE;
00981   static  const   int32_t             PRIMIGNORABLE;
00982   static  const   int32_t             SECIGNORABLE;
00983   static  const   int32_t             TERIGNORABLE;
00984 
00985 
00986   static const int16_t                FILEID;
00987 
00988   static       UnicodeString      DEFAULTRULES;
00989 
00990   static  const char*             kFilenameSuffix;
00991 
00992   static UBool isMutexInited;
00993   static UMTX collMutex;
00994   static UChar cacheKey;
00995 
00996         //--------------------------------------------------------------------------
00997         // Data Members
00998 
00999   UBool              isOverIgnore;
01000   UChar             lastChar;
01001   MergeCollation*     mPattern;
01002   UnicodeString       sbuffer;
01003   UnicodeString       tbuffer;
01004   UnicodeString       key;
01005   NormalizerIterator  *cursor1;
01006   NormalizerIterator  *cursor2;
01007   UBool              dataIsOwned;
01008   TableCollationData* data;
01009   Normalizer::EMode fDefaultDecomp;
01010   void **fSomeMemory;
01011   int32_t *fSizes;
01012   int32_t fAvailableMemory;
01013   int32_t fUsedMemory;
01014 
01015   void *getSomeMemory(int32_t size);
01016 
01017 };
01018 
01019 
01020 inline UBool
01021 RuleBasedCollator::operator!=(const Collator& other) const
01022 {
01023   return !(*this == other);
01024 }
01025 
01026 inline void
01027 RuleBasedCollator::addContractOrder(const UnicodeString &groupChars,
01028                     int32_t                anOrder,
01029                     UErrorCode            &status)
01030 {
01031   addContractOrder(groupChars, anOrder, TRUE, status);
01032 }
01033 
01034 #endif

Generated at Fri Dec 15 12:12:35 2000 for ICU 1.7 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000