00001 /* 00002 * Copyright © {1999}, International Business Machines Corporation and others. All Rights Reserved. 00003 ********************************************************************** 00004 * Date Name Description 00005 * 11/17/99 aliu Creation. 00006 ********************************************************************** 00007 */ 00008 #ifndef TRANSLIT_H 00009 #define TRANSLIT_H 00010 00011 #include "unicode/unistr.h" 00012 #include "unicode/parseerr.h" 00013 00014 class Replaceable; 00015 class UnicodeFilter; 00016 class TransliterationRuleData; 00017 class Hashtable; 00018 class U_I18N_API UVector; 00019 class CompoundTransliterator; 00020 00224 class U_I18N_API Transliterator { 00225 00226 public: 00227 00235 enum Direction { 00236 FORWARD, 00237 REVERSE 00238 }; 00242 class Position { 00243 public: 00247 int32_t start; 00248 00252 int32_t limit; 00253 00258 int32_t cursor; 00259 00264 int32_t end; 00265 00270 Position(int32_t start, int32_t limit); 00271 00276 Position(int32_t start, int32_t limit, int32_t cursor); 00277 00281 Position(int32_t start, int32_t limit, 00282 int32_t cursor, int32_t end); 00283 }; 00284 00285 private: 00286 00290 UnicodeString ID; 00291 00298 UnicodeFilter* filter; 00299 00300 int32_t maximumContextLength; 00301 00324 static Hashtable* cache; 00325 00329 static UMTX cacheMutex; 00330 00340 static UBool cacheInitialized; 00341 00355 struct CacheEntry { 00356 enum Type { 00357 RULE_BASED_PLACEHOLDER, 00358 REVERSE_RULE_BASED_PLACEHOLDER, 00359 PROTOTYPE, 00360 RBT_DATA, 00361 NONE // Only used for uninitialized entries 00362 } entryType; 00363 UnicodeString rbFile; // For *PLACEHOLDER 00364 union { 00365 Transliterator* prototype; // For PROTOTYPE 00366 TransliterationRuleData* data; // For RBT_DATA 00367 } u; 00368 CacheEntry(); 00369 ~CacheEntry(); 00370 void adoptPrototype(Transliterator* adopted); 00371 }; 00372 00378 static const char* RB_DISPLAY_NAME_PREFIX; 00379 00385 static const char* RB_SCRIPT_DISPLAY_NAME_PREFIX; 00386 00393 static const char* RB_DISPLAY_NAME_PATTERN; 00394 00401 static const char* RB_RULE_BASED_IDS; 00402 00406 static const char* RB_RULE; 00407 00408 protected: 00409 00418 Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter); 00419 00423 Transliterator(const Transliterator&); 00424 00428 Transliterator& operator=(const Transliterator&); 00429 00430 public: 00431 00436 virtual ~Transliterator(); 00437 00451 virtual Transliterator* clone() const { return 0; } 00452 00471 virtual int32_t transliterate(Replaceable& text, 00472 int32_t start, int32_t limit) const; 00473 00479 virtual void transliterate(Replaceable& text) const; 00480 00543 virtual void transliterate(Replaceable& text, Position& index, 00544 const UnicodeString& insertion, 00545 UErrorCode& status) const; 00546 00563 virtual void transliterate(Replaceable& text, Position& index, 00564 UChar insertion, 00565 UErrorCode& status) const; 00566 00579 virtual void transliterate(Replaceable& text, Position& index, 00580 UErrorCode& status) const; 00581 00593 virtual void finishTransliteration(Replaceable& text, 00594 Position& index) const; 00595 00596 private: 00597 00605 void _transliterate(Replaceable& text, 00606 Position& index, 00607 const UnicodeString* insertion, 00608 UErrorCode &status) const; 00609 00610 protected: 00611 00635 virtual void handleTransliterate(Replaceable& text, 00636 Position& index, 00637 UBool incremental) const = 0; 00638 00639 // C++ requires this friend declaration so CompoundTransliterator 00640 // can access handleTransliterate. Alternatively, we could 00641 // make handleTransliterate public. 00642 friend class CompoundTransliterator; 00643 00644 public: 00645 00659 int32_t getMaximumContextLength(void) const; 00660 00661 protected: 00662 00667 void setMaximumContextLength(int32_t maxContextLength); 00668 00669 public: 00670 00680 virtual const UnicodeString& getID(void) const; 00681 00688 static UnicodeString& getDisplayName(const UnicodeString& ID, 00689 UnicodeString& result); 00690 00710 static UnicodeString& getDisplayName(const UnicodeString& ID, 00711 const Locale& inLocale, 00712 UnicodeString& result); 00713 00719 virtual const UnicodeFilter* getFilter(void) const; 00720 00727 UnicodeFilter* orphanFilter(void); 00728 00738 virtual void adoptFilter(UnicodeFilter* adoptedFilter); 00739 00761 Transliterator* createInverse(void) const; 00762 00776 static Transliterator* createInstance(const UnicodeString& ID, 00777 Direction dir = FORWARD, 00778 ParseError* parseError = 0); 00779 00780 private: 00781 00788 static char* DATA_DIR; 00789 00794 static const char* RESOURCE_SUB_DIR; 00795 00801 static const char* getDataDirectory(void); 00802 00807 static Transliterator* _createInstance(const UnicodeString& ID, 00808 ParseError* parseError = 0); 00809 00810 public: 00811 00829 static void registerInstance(Transliterator* adoptedObj, 00830 UErrorCode& status); 00831 00832 private: 00833 00838 static void _registerInstance(Transliterator* adoptedPrototype, 00839 UErrorCode &status); 00840 00841 public: 00842 00854 static void unregister(const UnicodeString& ID); 00855 00856 private: 00857 00863 static void _unregister(const UnicodeString& ID); 00864 00876 // virtual Enumeration getAvailableIDs(); 00877 00881 static UVector cacheIDs; 00882 00883 public: 00884 00891 static int32_t countAvailableIDs(void); 00892 00899 static const UnicodeString& getAvailableID(int32_t index); 00900 00901 protected: 00902 00907 UChar filteredCharAt(const Replaceable& text, int32_t i) const; 00908 00913 void setID(const UnicodeString& id); 00914 00915 private: 00920 static UBool compareIDs(void* a, void* b); 00921 00922 static void initializeCache(void); 00923 00924 /* IDs take the form <source> ID_SEP <target>, where 00925 * <source> and <target> are (usually) script names. 00926 * Compound IDs take the form <ID> ( ID_DELIM <ID> )+. 00927 */ 00928 static const UChar ID_SEP; // ((UChar)0x002D) /*-*/ 00929 static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/ 00930 }; 00931 00932 inline int32_t Transliterator::getMaximumContextLength(void) const { 00933 return maximumContextLength; 00934 } 00935 00936 inline void Transliterator::setID(const UnicodeString& id) { 00937 ID = id; 00938 } 00939 00940 inline Transliterator::Position::Position(int32_t aStart, int32_t aLimit) : 00941 start(aStart), limit(aLimit), cursor(aStart), end(aLimit) {} 00942 00943 inline Transliterator::Position::Position(int32_t aStart, int32_t aLimit, 00944 int32_t aCursor) : 00945 start(aStart), limit(aLimit), cursor(aCursor), end(aLimit) {} 00946 00947 inline Transliterator::Position::Position(int32_t aStart, int32_t aLimit, 00948 int32_t aCursor, int32_t anEnd) : 00949 start(aStart), limit(aLimit), cursor(aCursor), end(anEnd) {} 00950 00951 #endif