00001 /* 00002 ********************************************************************** 00003 * Copyright © {1999}, International Business Machines Corporation and others. All Rights Reserved. 00004 ********************************************************************** 00005 * Date Name Description 00006 * 10/20/99 alan Creation. 00007 ********************************************************************** 00008 */ 00009 00010 #ifndef UNICODESET_H 00011 #define UNICODESET_H 00012 00013 #include "unicode/unifilt.h" 00014 #include "unicode/utypes.h" 00015 #include "unicode/unistr.h" 00016 00017 class ParsePosition; 00018 class SymbolTable; 00019 class TransliterationRuleParser; 00020 class TransliterationRule; 00021 00249 class U_I18N_API UnicodeSet : public UnicodeFilter { 00250 00251 int32_t len; // length of list used; 0 <= len <= capacity 00252 int32_t capacity; // capacity of list 00253 int32_t bufferCapacity; // capacity of buffer 00254 UChar32* list; // MUST be terminated with HIGH 00255 UChar32* buffer; // internal buffer, may be NULL 00256 00257 #ifndef HPUX 00258 static const UChar32 HIGH; // HIGH > all valid values. 110000 for codepoints 00259 #endif 00260 static const UChar32 LOW; // LOW <= all valid values. ZERO for codepoints 00261 00262 static const int32_t START_EXTRA; // initial storage. Must be >= 0 00263 static const int32_t GROW_EXTRA; // extra amount for growth. Must be >= 0 00264 00265 static const UnicodeString CATEGORY_NAMES; 00266 00272 static UnicodeSet* CATEGORY_CACHE; 00273 00278 static const UnicodeString CATEGORY_CLOSE; 00279 00280 // More special characters... 00281 static const UChar SET_OPEN; 00282 static const UChar SET_CLOSE; 00283 static const UChar HYPHEN; 00284 static const UChar COMPLEMENT; 00285 static const UChar COLON; 00286 static const UChar BACKSLASH; 00287 static const UChar INTERSECTION; 00288 static const UChar UPPER_U; 00289 00290 public: 00291 00295 static const UChar32 MIN_VALUE; 00296 00300 static const UChar32 MAX_VALUE; 00301 00302 //---------------------------------------------------------------- 00303 // Constructors &c 00304 //---------------------------------------------------------------- 00305 00306 public: 00307 00312 UnicodeSet(); 00313 00321 UnicodeSet(UChar32 start, UChar32 end); 00322 00331 UnicodeSet(const UnicodeString& pattern, 00332 UErrorCode& status); 00333 00342 UnicodeSet(int8_t category, UErrorCode& status); 00343 00348 UnicodeSet(const UnicodeSet& o); 00349 00354 virtual ~UnicodeSet(); 00355 00360 UnicodeSet& operator=(const UnicodeSet& o); 00361 00373 virtual UBool operator==(const UnicodeSet& o) const; 00374 00380 UBool operator!=(const UnicodeSet& o) const; 00381 00388 virtual UnicodeFilter* clone() const; 00389 00397 virtual int32_t hashCode(void) const; 00398 00399 //---------------------------------------------------------------- 00400 // Public API 00401 //---------------------------------------------------------------- 00402 00411 void set(UChar32 start, UChar32 end); 00412 00422 virtual void applyPattern(const UnicodeString& pattern, 00423 UErrorCode& status); 00424 00431 virtual UnicodeString& toPattern(UnicodeString& result) const; 00432 00440 virtual int32_t size(void) const; 00441 00448 virtual UBool isEmpty(void) const; 00449 00458 virtual UBool contains(UChar32 start, UChar32 end) const; 00459 00466 virtual UBool contains(UChar32 c) const; 00467 00475 virtual UBool contains(UChar c) const; 00476 00490 virtual void add(UChar32 start, UChar32 end); 00491 00498 void add(UChar32 c); 00499 00511 virtual void retain(UChar32 start, UChar32 end); 00512 00513 00517 void retain(UChar32 c); 00518 00531 virtual void remove(UChar32 start, UChar32 end); 00532 00539 void remove(UChar32 c); 00540 00547 virtual void complement(void); 00548 00561 virtual void complement(UChar32 start, UChar32 end); 00562 00563 00569 void complement(UChar32 c); 00570 00580 virtual UBool containsAll(const UnicodeSet& c) const; 00581 00593 virtual void addAll(const UnicodeSet& c); 00594 00605 virtual void retainAll(const UnicodeSet& c); 00606 00617 virtual void removeAll(const UnicodeSet& c); 00618 00627 virtual void complementAll(const UnicodeSet& c); 00628 00634 virtual void clear(void); 00635 00642 virtual int32_t getRangeCount(void) const; 00643 00650 virtual UChar32 getRangeStart(int32_t index) const; 00651 00658 virtual UChar32 getRangeEnd(int32_t index) const; 00659 00664 virtual void compact(); 00665 00666 private: 00667 00668 //---------------------------------------------------------------- 00669 // RuleBasedTransliterator support 00670 //---------------------------------------------------------------- 00671 00672 friend class TransliterationRuleParser; 00673 friend class TransliterationRule; 00674 00693 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, 00694 const SymbolTable& symbols, 00695 UErrorCode& status); 00696 00702 UBool containsIndexValue(uint8_t v) const; 00703 00704 private: 00705 00706 //---------------------------------------------------------------- 00707 // Implementation: Pattern parsing 00708 //---------------------------------------------------------------- 00709 00734 void applyPattern(const UnicodeString& pattern, 00735 ParsePosition& pos, 00736 const SymbolTable* symbols, 00737 UErrorCode& status); 00738 00739 //---------------------------------------------------------------- 00740 // Implementation: Generation of pairs for Unicode categories 00741 //---------------------------------------------------------------- 00742 00757 void applyCategory(const UnicodeString& catName, 00758 UErrorCode& status); 00759 00765 static const UnicodeSet& getCategorySet(int8_t cat); 00766 00767 //---------------------------------------------------------------- 00768 // Implementation: Utility methods 00769 //---------------------------------------------------------------- 00770 00775 static UChar charAfter(const UnicodeString& str, int32_t i); 00776 00777 void ensureCapacity(int32_t newLen); 00778 00779 void ensureBufferCapacity(int32_t newLen); 00780 00781 void swapBuffers(void); 00782 00783 static const UChar HEX[16]; 00784 00785 static void _toPat(UnicodeString& buf, UChar32 c); 00786 00787 //---------------------------------------------------------------- 00788 // Implementation: Fundamental operators 00789 //---------------------------------------------------------------- 00790 00791 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity); 00792 00793 void add(const UChar32* other, int32_t otherLen, int8_t polarity); 00794 00795 void retain(const UChar32* other, int32_t otherLen, int8_t polarity); 00796 }; 00797 00798 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { 00799 return !operator==(o); 00800 } 00801 00802 #endif