Main Page   Class Hierarchy   Compound List   File List   Header Files   Sources   Compound Members   File Members  

tblcoll.h

00001 /*
00002 * Copyright © {1996-1999}, International Business Machines Corporation and others. All Rights Reserved.
00003 *******************************************************************************
00004 *
00005 * File tblcoll.h
00006 *
00007 * Created by: Helena Shih
00008 *
00009 * Modification History:
00010 *
00011 *  Date        Name        Description
00012 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
00013 *                          constructor which reads RuleBasedCollator object from
00014 *                          a binary file.  Added writeToFile method which streams
00015 *                          RuleBasedCollator out to a binary file.  The streamIn
00016 *                          and streamOut methods use istream and ostream objects
00017 *                          in binary mode.
00018 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
00019 *                          hold invariant data.
00020 *  2/13/97     aliu        Moved several methods into this class from Collation.
00021 *                          Added a private RuleBasedCollator(Locale&) constructor,
00022 *                          to be used by Collator::createDefault().  General
00023 *                          clean up.
00024 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
00025 *                          constructor and getDynamicClassID.
00026 *  3/5/97      aliu        Modified constructFromFile() to add parameter
00027 *                          specifying whether or not binary loading is to be
00028 *                          attempted.  This is required for dynamic rule loading.
00029 * 05/07/97     helena      Added memory allocation error detection.
00030 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to 
00031 *                          use MergeCollation::getPattern.
00032 *  6/20/97     helena      Java class name change.
00033 *  8/18/97     helena      Added internal API documentation.
00034 * 09/03/97     helena      Added createCollationKeyValues().
00035 * 02/10/98     damiba      Added compare with "length" parameter
00036 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
00037 * 04/23/99     stephen     Removed EDecompositionMode, merged with
00038 *                          Normalizer::EMode
00039 * 06/14/99     stephen     Removed kResourceBundleSuffix
00040 * 11/02/99     helena      Collator performance enhancements.  Eliminates the 
00041 *                          UnicodeString construction and special case for NO_OP.
00042 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
00043 *                          internal state management.
00044 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
00045 *                          to implementation file.
00046 *******************************************************************************
00047 */
00048 
00049 #ifndef TBLCOLL_H
00050 #define TBLCOLL_H
00051 
00052 #include "unicode/utypes.h"
00053 #include "unicode/coll.h"
00054 #include "unicode/chariter.h"
00055 #include "unicode/unistr.h"
00056 #include "unicode/sortkey.h"
00057 #include "unicode/normlzr.h"
00058 
00059 class VectorOfPToContractElement;
00060 class VectorOfInt;
00061 class VectorOfPToContractTable;
00062 class VectorOfPToExpandTable;
00063 class MergeCollation;
00064 class CollationElementIterator;
00065 class RuleBasedCollatorStreamer;
00066 class NormalizerIterator; // see tblcoll.cpp
00067 class Collator;
00068 class TableCollationData;
00069 
00319 class U_I18N_API RuleBasedCollator : public Collator 
00320 {
00321 public: 
00322 
00323   // constructor/destructor
00332         RuleBasedCollator(const UnicodeString& rules,
00333                         UErrorCode& status);
00334 
00335   RuleBasedCollator(  const   UnicodeString&  rules,
00336               ECollationStrength collationStrength,
00337               UErrorCode&      status);
00338 
00339   RuleBasedCollator(  const   UnicodeString&  rules,
00340               Normalizer::EMode decompositionMode,
00341               UErrorCode&      status);
00342 
00343   RuleBasedCollator(  const   UnicodeString&  rules,
00344               ECollationStrength collationStrength,
00345               Normalizer::EMode  decompositionMode,
00346               UErrorCode&      status);
00347 
00351         virtual ~RuleBasedCollator();
00352 
00353 
00357         RuleBasedCollator(const RuleBasedCollator& other);
00358 
00363         RuleBasedCollator& operator=(const RuleBasedCollator& other);
00364     
00369   virtual UBool                  operator==(const Collator& other) const;
00370 
00375   virtual UBool                  operator!=(const Collator& other) const;
00376 
00382   virtual Collator*               clone(void) const;
00383 
00393         virtual CollationElementIterator* createCollationElementIterator(const UnicodeString& source) const;
00394 
00405   virtual CollationElementIterator*       createCollationElementIterator(const CharacterIterator& source) const;
00406 
00420   virtual     EComparisonResult   compare(    const   UnicodeString&  source, 
00421                           const   UnicodeString&  target) const;
00422         
00423         
00438   virtual     EComparisonResult   compare(    const   UnicodeString&  source, 
00439                           const   UnicodeString&  target,
00440                           int32_t length) const;
00441 
00471   virtual EComparisonResult   compare(    const   UChar* source, 
00472                       int32_t sourceLength,
00473                       const   UChar*  target,
00474                       int32_t targetLength) const ;
00475 
00487   virtual     CollationKey&       getCollationKey(    const   UnicodeString&  source,
00488                               CollationKey&   key,
00489                               UErrorCode&  status) const;
00490 
00502   virtual CollationKey&       getCollationKey(const UChar *source,
00503                                               int32_t sourceLength,
00504                                               CollationKey&       key,
00505                                               UErrorCode&      status) const;
00506 
00512   virtual     int32_t             hashCode(void) const;
00513 
00520   const       UnicodeString&      getRules(void) const;
00521 
00533         int32_t getMaxExpansion(int32_t order) const;
00534 
00545   virtual UClassID getDynamicClassID(void) const
00546     { return RuleBasedCollator::getStaticClassID(); }
00547 
00548 
00559   static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
00560 
00561 
00571   uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
00572 
00573  /*****************************************************************************
00574  * PRIVATE
00575  *****************************************************************************/
00576 private:
00577   static      char                fgClassID;
00578 
00579         // Streamer used to read/write binary collation data files.
00580   friend        class                RuleBasedCollatorStreamer;
00581 
00582   // Used to iterate over collation elements in a character source.
00583   friend      class               CollationElementIterator;
00584         
00585   // Collator ONLY needs access to RuleBasedCollator(const Locale&, UErrorCode&)
00586   friend class Collator;
00587         
00588   // TableCollationData ONLY needs access to UNMAPPED
00589   friend class TableCollationData;
00590 
00591 
00594   RuleBasedCollator();
00595 
00601   int32_t                addExpansion(int32_t anOrder,
00602                              const UnicodeString &expandChars);
00608   void                build(  const   UnicodeString&  rules,
00609                   UErrorCode&      success);
00610 
00614   void                addComposedChars(void);
00615 
00619   void                commit(void);
00626   int32_t             increment(  Collator::ECollationStrength    s, 
00627                   int32_t                         lastOrder);
00634   void                addOrder(   UChar        ch, 
00635                   int32_t        anOrder, 
00636                   UErrorCode&  status);
00644   void                addExpandOrder(const    UnicodeString&          groupChars, 
00645                      const    UnicodeString&          expChars, 
00646                      int32_t                            anOrder,
00647                      UErrorCode&                       status);
00654   void                addContractOrder(const  UnicodeString&          groupChars, 
00655                        int32_t                        anOrder,
00656                        UErrorCode&                     status);
00664   void                addContractOrder(const  UnicodeString&          groupChars, 
00665                        int32_t                        anOrder,
00666                        UBool                            fwd,
00667                        UErrorCode&                     status);
00675   int32_t                getContractOrder(const    UnicodeString            &groupChars) const;
00683   VectorOfPToContractElement* 
00684   getContractValues(UChar     ch) const;
00692   VectorOfPToContractElement* 
00693   getContractValues(int32_t     index) const;
00701   VectorOfInt*        getExpandValueList(int32_t     order) const;
00702 
00708   int32_t                getCharOrder(UChar ch) const;
00709 
00716   static        int32_t             getEntry(   VectorOfPToContractElement*     list, 
00717                         const   UnicodeString&          name,
00718                         UBool                    fwd);
00719 
00729   UBool              writeToFile(const char* fileName) const; // True on success
00730   /*  UBool              prepareForBundle() const;*/
00731 
00738   void                addToCache(         const UnicodeString& key);
00739 
00748   RuleBasedCollator(      const Locale& desiredLocale,
00749               UErrorCode& status);
00765   void                constructFromRules( const UnicodeString& rules,
00766                       UErrorCode& status);
00767   void                constructFromFile(  const Locale&           locale,
00768                       const UnicodeString&    localeFileName,
00769                       UBool                  tryBinaryFile,
00770                       UErrorCode&              status);
00771   void                constructFromFile(  const char* fileName,
00772                       UErrorCode& status);
00773   void                constructFromCache( const UnicodeString& key,
00774                       UErrorCode& status);
00775   const char*         constructFromBundle(const Locale& fileName,
00776                                           UErrorCode& status);
00777 
00778 
00779   //--------------------------------------------------------------------------
00780   // Internal Static Utility Methods
00788   static  char*               createPathName( const UnicodeString&    prefix,
00789                           const UnicodeString&    name,
00790                           const UnicodeString&    suffix);
00791 
00792   int32_t getStrengthOrder(NormalizerIterator* cursor, 
00793                                     UErrorCode status) const;
00794   VectorOfInt* makeReorderedBuffer(NormalizerIterator* cursor,
00795                                    UChar colFirst,
00796                                    int32_t lastValue,
00797                                    VectorOfInt* lastExpansion) const;
00798   int32_t strengthOrder(int32_t value) const ;
00799   int32_t nextContractChar(NormalizerIterator *cursor, 
00800                            UChar ch,
00801                            UErrorCode& status) const;
00807   static  void                chopLocale(UnicodeString&   localeName);
00808 
00809   //--------------------------------------------------------------------------
00810   // Constants
00811 
00812   static  const   int32_t             UNMAPPED;
00813   static  const   int32_t             CHARINDEX;  // need look up in .commit()
00814   static  const   int32_t             EXPANDCHARINDEX; // Expand index follows
00815   static  const   int32_t             CONTRACTCHARINDEX;  // contract indexes follow
00816 
00817   static  const   int32_t             PRIMARYORDERINCREMENT;
00818   static  const   int32_t             MAXIGNORABLE;
00819   static  const   int32_t             SECONDARYORDERINCREMENT;
00820   static  const   int32_t             TERTIARYORDERINCREMENT;
00821   static  const   int32_t             PRIMARYORDERMASK;
00822   static  const   int32_t             SECONDARYORDERMASK;
00823   static  const   int32_t             TERTIARYORDERMASK;
00824   static  const   int32_t             SECONDARYRESETMASK;
00825   static  const   int32_t             IGNORABLEMASK;
00826   static  const   int32_t             PRIMARYDIFFERENCEONLY;
00827   static  const   int32_t             SECONDARYDIFFERENCEONLY;
00828   static  const   int32_t             PRIMARYORDERSHIFT;
00829   static  const   int32_t             SECONDARYORDERSHIFT;
00830   static  const   int32_t             SORTKEYOFFSET;
00831   static  const   int32_t             CONTRACTCHAROVERFLOW;
00832 
00833   static const int16_t                FILEID;
00834 
00835   static       UnicodeString      DEFAULTRULES;
00836 
00837   static  const char*             kFilenameSuffix;
00838 
00839         //--------------------------------------------------------------------------
00840         // Data Members
00841 
00842   UBool              isOverIgnore;
00843   UChar             lastChar;
00844   MergeCollation*     mPattern;
00845   UnicodeString       sbuffer;
00846   UnicodeString       tbuffer;
00847   UnicodeString       key;
00848   NormalizerIterator  *cursor1;
00849   NormalizerIterator  *cursor2;
00850   UBool              dataIsOwned;
00851   TableCollationData* data;
00852 };
00853 
00854 inline UBool
00855 RuleBasedCollator::operator!=(const Collator& other) const
00856 {
00857   return !(*this == other);
00858 }
00859 
00860 inline void
00861 RuleBasedCollator::addContractOrder(const UnicodeString &groupChars,
00862                     int32_t                anOrder,
00863                     UErrorCode            &status)
00864 {
00865   addContractOrder(groupChars, anOrder, TRUE, status);
00866 }
00867 
00868 
00869 
00870 
00871 #endif

Generated at Mon Jun 5 12:53:05 2000 for ICU1.5 by doxygen 1.0.0 written by Dimitri van Heesch, © 1997-1999