Main Page   Class Hierarchy   Compound List   File List   Header Files   Sources   Compound Members   File Members  

translit.h

00001 /*
00002 * Copyright © {1999}, International Business Machines Corporation and others. All Rights Reserved.
00003 **********************************************************************
00004 *   Date        Name        Description
00005 *   11/17/99    aliu        Creation.
00006 **********************************************************************
00007 */
00008 #ifndef TRANSLIT_H
00009 #define TRANSLIT_H
00010 
00011 #include "unicode/unistr.h"
00012 #include "unicode/parseerr.h"
00013 
00014 class Replaceable;
00015 class UnicodeFilter;
00016 class TransliterationRuleData;
00017 class Hashtable;
00018 class U_I18N_API UVector;
00019 class CompoundTransliterator;
00020 
00224 class U_I18N_API Transliterator {
00225 
00226 public:
00227 
00235     enum Direction {
00236         FORWARD,
00237         REVERSE
00238     };
00242     class Position {
00243     public:
00247         int32_t start;
00248 
00252         int32_t limit;
00253 
00258         int32_t cursor;
00259 
00264         int32_t end;
00265 
00270         Position(int32_t start, int32_t limit);
00271 
00276         Position(int32_t start, int32_t limit, int32_t cursor);
00277 
00281         Position(int32_t start, int32_t limit,
00282                  int32_t cursor, int32_t end);
00283     };
00284 
00285 private:
00286 
00290     UnicodeString ID;
00291 
00298     UnicodeFilter* filter;
00299 
00300     int32_t maximumContextLength;
00301 
00324     static Hashtable* cache;
00325 
00329     static UMTX cacheMutex;
00330 
00340     static UBool cacheInitialized;
00341 
00355     struct CacheEntry {
00356         enum Type {
00357             RULE_BASED_PLACEHOLDER,
00358             REVERSE_RULE_BASED_PLACEHOLDER,
00359             PROTOTYPE,
00360             RBT_DATA,
00361             NONE // Only used for uninitialized entries
00362         } entryType;
00363         UnicodeString rbFile; // For *PLACEHOLDER
00364         union {
00365             Transliterator* prototype; // For PROTOTYPE
00366             TransliterationRuleData* data; // For RBT_DATA
00367         } u;
00368         CacheEntry();
00369         ~CacheEntry();
00370         void adoptPrototype(Transliterator* adopted);
00371     };
00372 
00378     static const char* RB_DISPLAY_NAME_PREFIX;
00379 
00385     static const char* RB_SCRIPT_DISPLAY_NAME_PREFIX;
00386 
00393     static const char* RB_DISPLAY_NAME_PATTERN;
00394 
00401     static const char* RB_RULE_BASED_IDS;
00402 
00406     static const char* RB_RULE;
00407 
00408 protected:
00409 
00418     Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
00419 
00423     Transliterator(const Transliterator&);
00424 
00428     Transliterator& operator=(const Transliterator&);
00429 
00430 public:
00431 
00436     virtual ~Transliterator();
00437 
00451     virtual Transliterator* clone() const { return 0; }
00452 
00471     virtual int32_t transliterate(Replaceable& text,
00472                                   int32_t start, int32_t limit) const;
00473 
00479     virtual void transliterate(Replaceable& text) const;
00480 
00543     virtual void transliterate(Replaceable& text, Position& index,
00544                                const UnicodeString& insertion,
00545                                UErrorCode& status) const;
00546 
00563     virtual void transliterate(Replaceable& text, Position& index,
00564                                UChar insertion,
00565                                UErrorCode& status) const;
00566     
00579     virtual void transliterate(Replaceable& text, Position& index,
00580                                UErrorCode& status) const;
00581 
00593     virtual void finishTransliteration(Replaceable& text,
00594                                        Position& index) const;
00595 
00596 private:
00597 
00605     void _transliterate(Replaceable& text,
00606                         Position& index,
00607                         const UnicodeString* insertion,
00608                         UErrorCode &status) const;
00609 
00610 protected:
00611 
00635     virtual void handleTransliterate(Replaceable& text,
00636                                      Position& index,
00637                                      UBool incremental) const = 0;
00638 
00639     // C++ requires this friend declaration so CompoundTransliterator
00640     // can access handleTransliterate.  Alternatively, we could
00641     // make handleTransliterate public.
00642     friend class CompoundTransliterator;
00643 
00644 public:
00645 
00659     int32_t getMaximumContextLength(void) const;
00660 
00661 protected:
00662 
00667     void setMaximumContextLength(int32_t maxContextLength);
00668 
00669 public:
00670 
00680     virtual const UnicodeString& getID(void) const;
00681 
00688     static UnicodeString& getDisplayName(const UnicodeString& ID,
00689                                          UnicodeString& result);
00690 
00710     static UnicodeString& getDisplayName(const UnicodeString& ID,
00711                                          const Locale& inLocale,
00712                                          UnicodeString& result);
00713 
00719     virtual const UnicodeFilter* getFilter(void) const;
00720 
00727     UnicodeFilter* orphanFilter(void);
00728 
00738     virtual void adoptFilter(UnicodeFilter* adoptedFilter);
00739 
00761     Transliterator* createInverse(void) const;
00762 
00776     static Transliterator* createInstance(const UnicodeString& ID,
00777                                           Direction dir = FORWARD,
00778                                           ParseError* parseError = 0);
00779 
00780 private:
00781 
00788     static char* DATA_DIR;
00789     
00794     static const char* RESOURCE_SUB_DIR;
00795 
00801     static const char* getDataDirectory(void);
00802 
00807     static Transliterator* _createInstance(const UnicodeString& ID,
00808                                            ParseError* parseError = 0);
00809 
00810 public:
00811 
00829     static void registerInstance(Transliterator* adoptedObj,
00830                                  UErrorCode& status);
00831 
00832 private:
00833 
00838     static void _registerInstance(Transliterator* adoptedPrototype,
00839                                   UErrorCode &status);
00840 
00841 public:
00842 
00854     static void unregister(const UnicodeString& ID);
00855 
00856 private:
00857 
00863     static void _unregister(const UnicodeString& ID);
00864 
00876     // virtual Enumeration getAvailableIDs();
00877 
00881     static UVector cacheIDs;
00882 
00883 public:
00884 
00891     static int32_t countAvailableIDs(void);
00892 
00899     static const UnicodeString& getAvailableID(int32_t index);
00900 
00901 protected:
00902 
00907     UChar filteredCharAt(const Replaceable& text, int32_t i) const;
00908 
00913     void setID(const UnicodeString& id);
00914 
00915 private:
00920     static UBool compareIDs(void* a, void* b);
00921 
00922     static void initializeCache(void);
00923 
00924     /* IDs take the form <source> ID_SEP <target>, where
00925      * <source> and <target> are (usually) script names.
00926      * Compound IDs take the form <ID> ( ID_DELIM <ID> )+.
00927      */
00928     static const UChar ID_SEP;   // ((UChar)0x002D) /*-*/
00929     static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/
00930 };
00931 
00932 inline int32_t Transliterator::getMaximumContextLength(void) const {
00933     return maximumContextLength;
00934 }
00935 
00936 inline void Transliterator::setID(const UnicodeString& id) {
00937     ID = id;
00938 }
00939 
00940 inline Transliterator::Position::Position(int32_t aStart, int32_t aLimit) :
00941     start(aStart), limit(aLimit), cursor(aStart), end(aLimit) {}
00942 
00943 inline Transliterator::Position::Position(int32_t aStart, int32_t aLimit,
00944                                           int32_t aCursor) :
00945     start(aStart), limit(aLimit), cursor(aCursor), end(aLimit) {}
00946 
00947 inline Transliterator::Position::Position(int32_t aStart, int32_t aLimit,
00948                                           int32_t aCursor, int32_t anEnd) :
00949     start(aStart), limit(aLimit), cursor(aCursor), end(anEnd) {}
00950 
00951 #endif

Generated at Mon Jun 5 12:53:05 2000 for ICU1.5 by doxygen 1.0.0 written by Dimitri van Heesch, © 1997-1999