00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016
00017 U_NAMESPACE_BEGIN
00018
00019 class ParsePosition;
00020 class SymbolTable;
00021 class TransliterationRuleParser;
00022 class TransliterationRule;
00023 class Transliterator;
00024 class TransliteratorParser;
00025 class UVector;
00026
00219 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00220
00221 int32_t len;
00222 int32_t capacity;
00223 int32_t bufferCapacity;
00224 UChar32* list;
00225 UChar32* buffer;
00226
00227 UVector* strings;
00228
00238 UnicodeString pat;
00239
00240 public:
00241
00245 static const UChar32 MIN_VALUE;
00246
00250 static const UChar32 MAX_VALUE;
00251
00252
00253
00254
00255
00256 public:
00257
00262 UnicodeSet();
00263
00271 UnicodeSet(UChar32 start, UChar32 end);
00272
00281 UnicodeSet(const UnicodeString& pattern,
00282 UErrorCode& status);
00283
00290 UnicodeSet(int8_t category, UErrorCode& status);
00291
00296 UnicodeSet(const UnicodeSet& o);
00297
00302 virtual ~UnicodeSet();
00303
00308 UnicodeSet& operator=(const UnicodeSet& o);
00309
00321 virtual UBool operator==(const UnicodeSet& o) const;
00322
00328 UBool operator!=(const UnicodeSet& o) const;
00329
00336 virtual UnicodeFunctor* clone() const;
00337
00345 virtual int32_t hashCode(void) const;
00346
00347
00348
00349
00350
00359 UnicodeSet& set(UChar32 start, UChar32 end);
00360
00365 static UBool resemblesPattern(const UnicodeString& pattern,
00366 int32_t pos);
00367
00377 virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00378 UErrorCode& status);
00379
00392 virtual UnicodeString& toPattern(UnicodeString& result,
00393 UBool escapeUnprintable = FALSE) const;
00394
00402 virtual int32_t size(void) const;
00403
00410 virtual UBool isEmpty(void) const;
00411
00418 virtual UBool contains(UChar32 c) const;
00419
00428 virtual UBool contains(UChar32 start, UChar32 end) const;
00429
00436 UBool contains(const UnicodeString& s) const;
00437
00444 virtual UBool containsAll(const UnicodeSet& c) const;
00445
00452 UBool containsAll(const UnicodeString& s) const;
00453
00461 UBool containsNone(UChar32 start, UChar32 end) const;
00462
00469 UBool containsNone(const UnicodeSet& c) const;
00470
00477 UBool containsNone(const UnicodeString& s) const;
00478
00486 inline UBool containsSome(UChar32 start, UChar32 end) const;
00487
00494 inline UBool containsSome(const UnicodeSet& s) const;
00495
00502 inline UBool containsSome(const UnicodeString& s) const;
00503
00507 UMatchDegree matches(const Replaceable& text,
00508 int32_t& offset,
00509 int32_t limit,
00510 UBool incremental);
00511
00512 private:
00534 static int32_t matchRest(const Replaceable& text,
00535 int32_t start, int32_t limit,
00536 const UnicodeString& s);
00537
00547 int32_t findCodePoint(UChar32 c) const;
00548
00549 public:
00550
00557 void addMatchSetTo(UnicodeSet& toUnionTo) const;
00558
00566 int32_t indexOf(UChar32 c) const;
00567
00576 UChar32 charAt(int32_t index) const;
00577
00591 virtual UnicodeSet& add(UChar32 start, UChar32 end);
00592
00599 UnicodeSet& add(UChar32 c);
00600
00610 UnicodeSet& add(const UnicodeString& s);
00611
00612 private:
00618 static int32_t getSingleCP(const UnicodeString& s);
00619
00620 void _add(const UnicodeString& s);
00621
00622 public:
00629 UnicodeSet& addAll(const UnicodeString& s);
00630
00637 UnicodeSet& retainAll(const UnicodeString& s);
00638
00645 UnicodeSet& complementAll(const UnicodeString& s);
00646
00653 UnicodeSet& removeAll(const UnicodeString& s);
00654
00662 static UnicodeSet* createFrom(const UnicodeString& s);
00663
00664
00671 static UnicodeSet* createFromAll(const UnicodeString& s);
00672
00685 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00686
00687
00692 UnicodeSet& retain(UChar32 c);
00693
00706 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00707
00714 UnicodeSet& remove(UChar32 c);
00715
00723 UnicodeSet& remove(const UnicodeString& s);
00724
00731 virtual UnicodeSet& complement(void);
00732
00746 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00747
00754 UnicodeSet& complement(UChar32 c);
00755
00764 UnicodeSet& complement(const UnicodeString& s);
00765
00777 virtual UnicodeSet& addAll(const UnicodeSet& c);
00778
00789 virtual UnicodeSet& retainAll(const UnicodeSet& c);
00790
00801 virtual UnicodeSet& removeAll(const UnicodeSet& c);
00802
00811 virtual UnicodeSet& complementAll(const UnicodeSet& c);
00812
00818 virtual UnicodeSet& clear(void);
00819
00826 virtual int32_t getRangeCount(void) const;
00827
00834 virtual UChar32 getRangeStart(int32_t index) const;
00835
00842 virtual UChar32 getRangeEnd(int32_t index) const;
00843
00891 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
00892
00897 virtual UnicodeSet& compact();
00898
00910 static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
00911
00919 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); };
00920
00921 private:
00922
00923
00924
00925 friend class USetAccess;
00926
00927 int32_t getStringCount() const;
00928
00929 const UnicodeString* getString(int32_t index) const;
00930
00931 private:
00932
00933 static const char fgClassID;
00934
00935
00936
00937
00938
00939 friend class Transliterator;
00940 friend class TransliteratorParser;
00941 friend class TransliteratorIDParser;
00942 friend class TransliterationRule;
00943
00944 friend class RBBIRuleScanner;
00945
00964 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00965 const SymbolTable& symbols,
00966 UErrorCode& status);
00967
00973 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00974 UErrorCode& status);
00975
00981 virtual UBool matchesIndexValue(uint8_t v) const;
00982
00983 private:
00984
00985
00986
00987
00988
01013 void applyPattern(const UnicodeString& pattern,
01014 ParsePosition& pos,
01015 const SymbolTable* symbols,
01016 UErrorCode& status);
01017
01018
01019
01020
01021
01022 void ensureCapacity(int32_t newLen);
01023
01024 void ensureBufferCapacity(int32_t newLen);
01025
01026 void swapBuffers(void);
01027
01028 UBool allocateStrings();
01029
01030 void _applyPattern(const UnicodeString& pattern,
01031 ParsePosition& pos,
01032 const SymbolTable* symbols,
01033 UnicodeString& rebuiltPat,
01034 UErrorCode& status);
01035
01036 UnicodeString& _toPattern(UnicodeString& result,
01037 UBool escapeUnprintable) const;
01038
01039 UnicodeString& _generatePattern(UnicodeString& result,
01040 UBool escapeUnprintable) const;
01041
01042 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01043
01044 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01045
01046
01047
01048
01049
01050 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01051
01052 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01053
01054 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01055
01056 friend class UnicodeSetIterator;
01057 };
01058
01059 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01060 return !operator==(o);
01061 }
01062
01063 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01064 return !containsNone(start, end);
01065 }
01066
01067 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01068 return !containsNone(s);
01069 }
01070
01071 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01072 return !containsNone(s);
01073 }
01074
01075 U_NAMESPACE_END
01076
01077 #endif