Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

uniset.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 * Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved.
00004 **********************************************************************
00005 *   Date        Name        Description
00006 *   10/20/99    alan        Creation.
00007 **********************************************************************
00008 */
00009 
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012 
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016 
00017 U_NAMESPACE_BEGIN
00018 
00019 class ParsePosition;
00020 class SymbolTable;
00021 class TransliterationRuleParser;
00022 class TransliterationRule;
00023 class Transliterator;
00024 class TransliteratorParser;
00025 class UVector;
00026 
00219 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00220 
00221     int32_t len; // length of list used; 0 <= len <= capacity
00222     int32_t capacity; // capacity of list
00223     int32_t bufferCapacity; // capacity of buffer
00224     UChar32* list; // MUST be terminated with HIGH
00225     UChar32* buffer; // internal buffer, may be NULL
00226 
00227     UVector* strings; // maintained in sorted order
00228 
00238     UnicodeString pat;
00239 
00240 public:
00241 
00245     static const UChar32 MIN_VALUE;
00246 
00250     static const UChar32 MAX_VALUE;
00251 
00252     //----------------------------------------------------------------
00253     // Constructors &c
00254     //----------------------------------------------------------------
00255 
00256 public:
00257 
00262     UnicodeSet();
00263 
00271     UnicodeSet(UChar32 start, UChar32 end);
00272 
00281     UnicodeSet(const UnicodeString& pattern,
00282                UErrorCode& status);
00283 
00290     UnicodeSet(int8_t category, UErrorCode& status);
00291 
00296     UnicodeSet(const UnicodeSet& o);
00297 
00302     virtual ~UnicodeSet();
00303 
00308     UnicodeSet& operator=(const UnicodeSet& o);
00309 
00321     virtual UBool operator==(const UnicodeSet& o) const;
00322 
00328     UBool operator!=(const UnicodeSet& o) const;
00329 
00336     virtual UnicodeFunctor* clone() const;
00337 
00345     virtual int32_t hashCode(void) const;
00346 
00347     //----------------------------------------------------------------
00348     // Public API
00349     //----------------------------------------------------------------
00350 
00359     UnicodeSet& set(UChar32 start, UChar32 end);
00360 
00365     static UBool resemblesPattern(const UnicodeString& pattern,
00366                                   int32_t pos);
00367 
00377     virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00378                                      UErrorCode& status);
00379 
00392     virtual UnicodeString& toPattern(UnicodeString& result,
00393                                      UBool escapeUnprintable = FALSE) const;
00394 
00402     virtual int32_t size(void) const;
00403 
00410     virtual UBool isEmpty(void) const;
00411 
00418     virtual UBool contains(UChar32 c) const;
00419     
00428     virtual UBool contains(UChar32 start, UChar32 end) const;
00429 
00436     UBool contains(const UnicodeString& s) const;
00437     
00444     virtual UBool containsAll(const UnicodeSet& c) const;
00445     
00452     UBool containsAll(const UnicodeString& s) const;
00453     
00461     UBool containsNone(UChar32 start, UChar32 end) const;
00462 
00469     UBool containsNone(const UnicodeSet& c) const;
00470     
00477     UBool containsNone(const UnicodeString& s) const;
00478         
00486     inline UBool containsSome(UChar32 start, UChar32 end) const;
00487         
00494     inline UBool containsSome(const UnicodeSet& s) const;
00495         
00502     inline UBool containsSome(const UnicodeString& s) const;
00503         
00507     UMatchDegree matches(const Replaceable& text,
00508                          int32_t& offset,
00509                          int32_t limit,
00510                          UBool incremental);
00511 
00512  private:    
00534     static int32_t matchRest(const Replaceable& text,
00535                              int32_t start, int32_t limit,
00536                              const UnicodeString& s);
00537     
00547     int32_t findCodePoint(UChar32 c) const;
00548 
00549  public:
00550 
00557     void addMatchSetTo(UnicodeSet& toUnionTo) const;
00558 
00566     int32_t indexOf(UChar32 c) const;
00567 
00576     UChar32 charAt(int32_t index) const;
00577 
00591     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00592 
00599     UnicodeSet& add(UChar32 c);
00600 
00610     UnicodeSet& add(const UnicodeString& s);
00611 
00612  private:    
00618     static int32_t getSingleCP(const UnicodeString& s);
00619 
00620     void _add(const UnicodeString& s);
00621     
00622  public:
00629     UnicodeSet& addAll(const UnicodeString& s);
00630 
00637     UnicodeSet& retainAll(const UnicodeString& s);
00638 
00645     UnicodeSet& complementAll(const UnicodeString& s);
00646 
00653     UnicodeSet& removeAll(const UnicodeString& s);
00654 
00662     static UnicodeSet* createFrom(const UnicodeString& s);
00663 
00664     
00671     static UnicodeSet* createFromAll(const UnicodeString& s);
00672 
00685     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00686 
00687 
00692     UnicodeSet& retain(UChar32 c);
00693 
00706     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00707 
00714     UnicodeSet& remove(UChar32 c);
00715 
00723     UnicodeSet& remove(const UnicodeString& s);
00724 
00731     virtual UnicodeSet& complement(void);
00732 
00746     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00747 
00754     UnicodeSet& complement(UChar32 c);
00755 
00764     UnicodeSet& complement(const UnicodeString& s);
00765 
00777     virtual UnicodeSet& addAll(const UnicodeSet& c);
00778 
00789     virtual UnicodeSet& retainAll(const UnicodeSet& c);
00790 
00801     virtual UnicodeSet& removeAll(const UnicodeSet& c);
00802 
00811     virtual UnicodeSet& complementAll(const UnicodeSet& c);
00812 
00818     virtual UnicodeSet& clear(void);
00819 
00826     virtual int32_t getRangeCount(void) const;
00827 
00834     virtual UChar32 getRangeStart(int32_t index) const;
00835 
00842     virtual UChar32 getRangeEnd(int32_t index) const;
00843 
00891     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
00892 
00897     virtual UnicodeSet& compact();
00898 
00910     static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
00911 
00919     virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); };
00920 
00921 private:
00922 
00923     // Private API for the USet API
00924 
00925     friend class USetAccess;
00926 
00927     int32_t getStringCount() const;
00928 
00929     const UnicodeString* getString(int32_t index) const;
00930 
00931 private:
00932 
00933     static const char fgClassID;
00934 
00935     //----------------------------------------------------------------
00936     // RuleBasedTransliterator support
00937     //----------------------------------------------------------------
00938 
00939     friend class Transliterator;
00940     friend class TransliteratorParser;
00941     friend class TransliteratorIDParser;
00942     friend class TransliterationRule;
00943 
00944     friend class RBBIRuleScanner;
00945 
00964     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00965                const SymbolTable& symbols,
00966                UErrorCode& status);
00967 
00973     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00974                UErrorCode& status);
00975 
00981     virtual UBool matchesIndexValue(uint8_t v) const;
00982 
00983 private:
00984 
00985     //----------------------------------------------------------------
00986     // Implementation: Pattern parsing
00987     //----------------------------------------------------------------
00988 
01013     void applyPattern(const UnicodeString& pattern,
01014                       ParsePosition& pos,
01015                       const SymbolTable* symbols,
01016                       UErrorCode& status);
01017 
01018     //----------------------------------------------------------------
01019     // Implementation: Utility methods
01020     //----------------------------------------------------------------
01021 
01022     void ensureCapacity(int32_t newLen);
01023 
01024     void ensureBufferCapacity(int32_t newLen);
01025 
01026     void swapBuffers(void);
01027 
01028     UBool allocateStrings();
01029 
01030     void _applyPattern(const UnicodeString& pattern,
01031                        ParsePosition& pos,
01032                        const SymbolTable* symbols,
01033                        UnicodeString& rebuiltPat,
01034                        UErrorCode& status);
01035 
01036     UnicodeString& _toPattern(UnicodeString& result,
01037                               UBool escapeUnprintable) const;
01038 
01039     UnicodeString& _generatePattern(UnicodeString& result,
01040                                     UBool escapeUnprintable) const;
01041 
01042     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01043 
01044     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01045 
01046     //----------------------------------------------------------------
01047     // Implementation: Fundamental operators
01048     //----------------------------------------------------------------
01049 
01050     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01051 
01052     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01053 
01054     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01055 
01056     friend class UnicodeSetIterator;
01057 };
01058 
01059 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01060     return !operator==(o);
01061 }
01062 
01063 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01064     return !containsNone(start, end);
01065 }
01066 
01067 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01068     return !containsNone(s);
01069 }
01070 
01071 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01072     return !containsNone(s);
01073 }
01074 
01075 U_NAMESPACE_END
01076 
01077 #endif

Generated on Thu Aug 15 14:13:32 2002 for ICU 2.2 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001