00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016
00017 class ParsePosition;
00018 class SymbolTable;
00019 class TransliterationRuleParser;
00020 class TransliterationRule;
00021
00249 class U_I18N_API UnicodeSet : public UnicodeFilter {
00250
00251 int32_t len;
00252 int32_t capacity;
00253 int32_t bufferCapacity;
00254 UChar32* list;
00255 UChar32* buffer;
00256
00257 #ifndef HPUX
00258
00259 #endif
00260
00261
00262 static const int32_t START_EXTRA;
00263 static const int32_t GROW_EXTRA;
00264
00265 static const UnicodeString CATEGORY_NAMES;
00266
00272 static UnicodeSet* CATEGORY_CACHE;
00273
00278 static const UnicodeString CATEGORY_CLOSE;
00279
00280
00281 static const UChar SET_OPEN;
00282 static const UChar SET_CLOSE;
00283 static const UChar HYPHEN;
00284 static const UChar COMPLEMENT;
00285 static const UChar COLON;
00286 static const UChar BACKSLASH;
00287 static const UChar INTERSECTION;
00288 static const UChar UPPER_U;
00289
00290 public:
00291
00295 static const UChar32 MIN_VALUE;
00296
00300 static const UChar32 MAX_VALUE;
00301
00302
00303
00304
00305
00306 public:
00307
00312 UnicodeSet();
00313
00321 UnicodeSet(UChar32 start, UChar32 end);
00322
00331 UnicodeSet(const UnicodeString& pattern,
00332 UErrorCode& status);
00333
00342 UnicodeSet(int8_t category, UErrorCode& status);
00343
00348 UnicodeSet(const UnicodeSet& o);
00349
00354 virtual ~UnicodeSet();
00355
00360 UnicodeSet& operator=(const UnicodeSet& o);
00361
00373 virtual UBool operator==(const UnicodeSet& o) const;
00374
00380 UBool operator!=(const UnicodeSet& o) const;
00381
00388 virtual UnicodeFilter* clone() const;
00389
00397 virtual int32_t hashCode(void) const;
00398
00399
00400
00401
00402
00411 void set(UChar32 start, UChar32 end);
00412
00422 virtual void applyPattern(const UnicodeString& pattern,
00423 UErrorCode& status);
00424
00431 virtual UnicodeString& toPattern(UnicodeString& result) const;
00432
00440 virtual int32_t size(void) const;
00441
00448 virtual UBool isEmpty(void) const;
00449
00458 virtual UBool contains(UChar32 start, UChar32 end) const;
00459
00466 virtual UBool contains(UChar32 c) const;
00467
00475 virtual UBool contains(UChar c) const;
00476
00490 virtual void add(UChar32 start, UChar32 end);
00491
00498 void add(UChar32 c);
00499
00511 virtual void retain(UChar32 start, UChar32 end);
00512
00513
00517 void retain(UChar32 c);
00518
00531 virtual void remove(UChar32 start, UChar32 end);
00532
00539 void remove(UChar32 c);
00540
00547 virtual void complement(void);
00548
00561 virtual void complement(UChar32 start, UChar32 end);
00562
00563
00569 void complement(UChar32 c);
00570
00580 virtual UBool containsAll(const UnicodeSet& c) const;
00581
00593 virtual void addAll(const UnicodeSet& c);
00594
00605 virtual void retainAll(const UnicodeSet& c);
00606
00617 virtual void removeAll(const UnicodeSet& c);
00618
00627 virtual void complementAll(const UnicodeSet& c);
00628
00634 virtual void clear(void);
00635
00642 virtual int32_t getRangeCount(void) const;
00643
00650 virtual UChar32 getRangeStart(int32_t index) const;
00651
00658 virtual UChar32 getRangeEnd(int32_t index) const;
00659
00664 virtual void compact();
00665
00666 private:
00667
00668
00669
00670
00671
00672 friend class TransliterationRuleParser;
00673 friend class TransliterationRule;
00674
00693 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00694 const SymbolTable& symbols,
00695 UErrorCode& status);
00696
00702 UBool containsIndexValue(uint8_t v) const;
00703
00704 private:
00705
00706
00707
00708
00709
00734 void applyPattern(const UnicodeString& pattern,
00735 ParsePosition& pos,
00736 const SymbolTable* symbols,
00737 UErrorCode& status);
00738
00739
00740
00741
00742
00757 void applyCategory(const UnicodeString& catName,
00758 UErrorCode& status);
00759
00765 static const UnicodeSet& getCategorySet(int8_t cat);
00766
00767
00768
00769
00770
00775 static UChar charAfter(const UnicodeString& str, int32_t i);
00776
00777 void ensureCapacity(int32_t newLen);
00778
00779 void ensureBufferCapacity(int32_t newLen);
00780
00781 void swapBuffers(void);
00782
00783 static const UChar HEX[16];
00784
00785 static void _toPat(UnicodeString& buf, UChar32 c);
00786
00787
00788
00789
00790
00791 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
00792
00793 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
00794
00795 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
00796 };
00797
00798 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
00799 return !operator==(o);
00800 }
00801
00802 #endif