00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uchar.h"
00017
00018 U_NAMESPACE_BEGIN
00019
00020 class ParsePosition;
00021 class SymbolTable;
00022 class TransliterationRuleParser;
00023 class TransliterationRule;
00024 class Transliterator;
00025 class TransliteratorParser;
00026 class UVector;
00027
00259 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00260
00261 int32_t len;
00262 int32_t capacity;
00263 int32_t bufferCapacity;
00264 UChar32* list;
00265 UChar32* buffer;
00266
00267 UVector* strings;
00268
00278 UnicodeString pat;
00279
00280 public:
00281
00286 static const UChar32 MIN_VALUE;
00287
00292 static const UChar32 MAX_VALUE;
00293
00294
00295
00296
00297
00298 public:
00299
00304 UnicodeSet();
00305
00314 UnicodeSet(UChar32 start, UChar32 end);
00315
00324 UnicodeSet(const UnicodeString& pattern,
00325 UErrorCode& status);
00326
00333 UnicodeSet(int8_t category, UErrorCode& status);
00334
00339 UnicodeSet(const UnicodeSet& o);
00340
00345 virtual ~UnicodeSet();
00346
00351 UnicodeSet& operator=(const UnicodeSet& o);
00352
00364 virtual UBool operator==(const UnicodeSet& o) const;
00365
00371 UBool operator!=(const UnicodeSet& o) const;
00372
00379 virtual UnicodeFunctor* clone() const;
00380
00388 virtual int32_t hashCode(void) const;
00389
00390
00391
00392
00393
00403 UnicodeSet& set(UChar32 start, UChar32 end);
00404
00410 static UBool resemblesPattern(const UnicodeString& pattern,
00411 int32_t pos);
00412
00422 virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00423 UErrorCode& status);
00424
00437 virtual UnicodeString& toPattern(UnicodeString& result,
00438 UBool escapeUnprintable = FALSE) const;
00439
00461 UnicodeSet& applyIntPropertyValue(UProperty prop,
00462 int32_t value,
00463 UErrorCode& ec);
00464
00492 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00493 const UnicodeString& value,
00494 UErrorCode& ec);
00495
00503 virtual int32_t size(void) const;
00504
00511 virtual UBool isEmpty(void) const;
00512
00519 virtual UBool contains(UChar32 c) const;
00520
00529 virtual UBool contains(UChar32 start, UChar32 end) const;
00530
00538 UBool contains(const UnicodeString& s) const;
00539
00547 virtual UBool containsAll(const UnicodeSet& c) const;
00548
00556 UBool containsAll(const UnicodeString& s) const;
00557
00566 UBool containsNone(UChar32 start, UChar32 end) const;
00567
00575 UBool containsNone(const UnicodeSet& c) const;
00576
00584 UBool containsNone(const UnicodeString& s) const;
00585
00594 inline UBool containsSome(UChar32 start, UChar32 end) const;
00595
00603 inline UBool containsSome(const UnicodeSet& s) const;
00604
00612 inline UBool containsSome(const UnicodeString& s) const;
00613
00618 UMatchDegree matches(const Replaceable& text,
00619 int32_t& offset,
00620 int32_t limit,
00621 UBool incremental);
00622
00623 private:
00645 static int32_t matchRest(const Replaceable& text,
00646 int32_t start, int32_t limit,
00647 const UnicodeString& s);
00648
00658 int32_t findCodePoint(UChar32 c) const;
00659
00660 public:
00661
00669 void addMatchSetTo(UnicodeSet& toUnionTo) const;
00670
00679 int32_t indexOf(UChar32 c) const;
00680
00690 UChar32 charAt(int32_t index) const;
00691
00705 virtual UnicodeSet& add(UChar32 start, UChar32 end);
00706
00713 UnicodeSet& add(UChar32 c);
00714
00725 UnicodeSet& add(const UnicodeString& s);
00726
00727 private:
00733 static int32_t getSingleCP(const UnicodeString& s);
00734
00735 void _add(const UnicodeString& s);
00736
00737 public:
00745 UnicodeSet& addAll(const UnicodeString& s);
00746
00754 UnicodeSet& retainAll(const UnicodeString& s);
00755
00763 UnicodeSet& complementAll(const UnicodeString& s);
00764
00771 UnicodeSet& removeAll(const UnicodeString& s);
00772
00781 static UnicodeSet* createFrom(const UnicodeString& s);
00782
00783
00791 static UnicodeSet* createFromAll(const UnicodeString& s);
00792
00805 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00806
00807
00812 UnicodeSet& retain(UChar32 c);
00813
00826 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00827
00834 UnicodeSet& remove(UChar32 c);
00835
00844 UnicodeSet& remove(const UnicodeString& s);
00845
00852 virtual UnicodeSet& complement(void);
00853
00867 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00868
00875 UnicodeSet& complement(UChar32 c);
00876
00885 UnicodeSet& complement(const UnicodeString& s);
00886
00898 virtual UnicodeSet& addAll(const UnicodeSet& c);
00899
00910 virtual UnicodeSet& retainAll(const UnicodeSet& c);
00911
00922 virtual UnicodeSet& removeAll(const UnicodeSet& c);
00923
00933 virtual UnicodeSet& complementAll(const UnicodeSet& c);
00934
00940 virtual UnicodeSet& clear(void);
00941
00949 virtual int32_t getRangeCount(void) const;
00950
00958 virtual UChar32 getRangeStart(int32_t index) const;
00959
00967 virtual UChar32 getRangeEnd(int32_t index) const;
00968
01017 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01018
01024 virtual UnicodeSet& compact();
01025
01037 static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
01038
01047 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
01048
01049 private:
01050
01051
01052
01053 friend class USetAccess;
01054
01055 int32_t getStringCount() const;
01056
01057 const UnicodeString* getString(int32_t index) const;
01058
01059 private:
01060
01061 static const char fgClassID;
01062
01063
01064
01065
01066
01067 friend class Transliterator;
01068 friend class TransliteratorParser;
01069 friend class TransliteratorIDParser;
01070 friend class TransliterationRule;
01071
01072 friend class RBBIRuleScanner;
01073 friend class RegexCompile;
01074
01093 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
01094 const SymbolTable& symbols,
01095 UErrorCode& status);
01096
01102 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
01103 UErrorCode& status);
01104
01110 virtual UBool matchesIndexValue(uint8_t v) const;
01111
01112 private:
01113
01114
01115
01116
01117
01142 void applyPattern(const UnicodeString& pattern,
01143 ParsePosition& pos,
01144 const SymbolTable* symbols,
01145 UErrorCode& status);
01146
01147
01148
01149
01150
01151 void ensureCapacity(int32_t newLen);
01152
01153 void ensureBufferCapacity(int32_t newLen);
01154
01155 void swapBuffers(void);
01156
01157 UBool allocateStrings();
01158
01159 void _applyPattern(const UnicodeString& pattern,
01160 ParsePosition& pos,
01161 const SymbolTable* symbols,
01162 UnicodeString& rebuiltPat,
01163 UErrorCode& status);
01164
01165 UnicodeString& _toPattern(UnicodeString& result,
01166 UBool escapeUnprintable) const;
01167
01168 UnicodeString& _generatePattern(UnicodeString& result,
01169 UBool escapeUnprintable) const;
01170
01171 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01172
01173 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01174
01175
01176
01177
01178
01179 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01180
01181 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01182
01183 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01184
01190 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01191 int32_t pos);
01192
01231 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01232 ParsePosition& ppos,
01233 UErrorCode &ec);
01234
01239 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01240
01249 void applyFilter(Filter filter,
01250 void* context,
01251 UErrorCode &status);
01252
01257 static const UnicodeSet* getInclusions();
01258
01259 friend class UnicodeSetIterator;
01260 };
01261
01262 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01263 return !operator==(o);
01264 }
01265
01266 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01267 return !containsNone(start, end);
01268 }
01269
01270 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01271 return !containsNone(s);
01272 }
01273
01274 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01275 return !containsNone(s);
01276 }
01277
01278 U_NAMESPACE_END
01279
01280 #endif