Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

regex.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 2002, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 */
00007 //
00008 //   file:   regex.h
00009 //
00010 //           ICU Regular Expressions, API for C++
00011 //
00012 
00013 #ifndef REGEX_H
00014 #define REGEX_H
00015 
00016 
00036 #include "unicode/utypes.h"
00037 
00038 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00039 
00040 #include "unicode/uobject.h"
00041 #include "unicode/unistr.h"
00042 #include "unicode/parseerr.h"
00043 
00044 U_NAMESPACE_BEGIN
00045 
00046 // Forward Declarations...
00047 class RegexMatcher;
00048 class UVector;
00049 class UStack;
00050 class UnicodeSet;
00051 
00052 
00058 enum {
00060         UREGEX_CANON_EQ         = 128, 
00062         UREGEX_CASE_INSENSITIVE = 2,  
00064         UREGEX_COMMENTS         = 4,  
00067         UREGEX_DOTALL           = 32,  
00072         UREGEX_MULTILINE        = 8  
00073 };
00074 
00075 
00076 
00088 class U_I18N_API RegexPattern: public UObject {
00089 public:
00090     
00098     RegexPattern();
00099 
00100 
00106     RegexPattern(const RegexPattern &source);
00107 
00113     virtual ~RegexPattern();
00114     
00123     UBool                  operator==(const RegexPattern& that) const;
00124 
00133     inline UBool           operator!=(const RegexPattern& that) const {return ! operator ==(that);};
00134     
00140     RegexPattern  &operator =(const RegexPattern &source);
00141 
00149     virtual RegexPattern  *clone() const;
00150 
00151     
00172     static RegexPattern *compile( const UnicodeString &regex,
00173         UParseError          &pe,
00174         UErrorCode           &status); 
00175     
00196     static RegexPattern *compile( const UnicodeString &regex,
00197         uint32_t             flags,
00198         UParseError          &pe,
00199         UErrorCode           &status); 
00200 
00201 
00207     virtual uint32_t flags() const;
00208     
00209    /*
00210     *  Creates a RegexMatcher that will match the given input against this pattern.  The
00211     *   RegexMatcher can then be used to perform match, find or replace operations
00212     *   on the input.  Note that a RegexPattern object must not be deleted while
00213     *   RegexMatchers created from it still exist and might possibly be used again.
00214     *
00215     *   @param input The input string to which the regular expression will be applied.
00216     *   @param status   A reference to a UErrorCode to receive any errors.
00217     *   @return      A RegexMatcher object for this pattern and input.
00218     *
00219     *   @draft ICU 2.4
00220     */
00221     virtual RegexMatcher *matcher(const UnicodeString &input,
00222         UErrorCode          &status) const;
00223     
00224     
00239     static UBool matches(const UnicodeString   &regex,
00240         const UnicodeString   &input,
00241         UParseError     &pe,
00242         UErrorCode      &status); 
00243     
00244     
00249     virtual UnicodeString pattern() const;
00250     
00251     
00270     virtual int32_t  split(const UnicodeString &input,
00271         UnicodeString    dest[],
00272         int32_t          destCapacity,
00273         UErrorCode       &status) const;
00274     
00275     
00276     
00281     void dump() const;
00282 
00288     virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
00289     
00295     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
00296     
00297 private:
00298     //
00299     //  Implementation Data
00300     //
00301     UnicodeString   fPattern;      // The original pattern string.
00302     uint32_t        fFlags;        // The flags used when compiling the pattern.
00303                                    //   
00304     UVector         *fCompiledPat; // The compiled pattern.
00305     UnicodeString   fLiteralText;  // Any literal string data from the pattern, 
00306                                    //   after un-escaping, for use during the match.
00307     UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
00308     UBool           fBadState;     // True if some prior error has left this
00309                                    //  RegexPattern in an unusable state.
00310 
00311     RegexMatcher    *fMatcher;     // A cached matcher for this pattern, used for
00312                                    //  split(), to avoid having to
00313                                    //  make new ones on each call.
00314 
00315     int32_t         fNumCaptureGroups;
00316     int32_t         fMaxCaptureDigits;
00317 
00318     UnicodeSet    **fStaticSets;  // Ptr to static (shared) sets for predefined
00319                                     //   regex character classes, e.g. Word.
00320 
00325     static const char fgClassID;
00326 
00327     friend class RegexCompile;
00328     friend class RegexMatcher;
00329 
00330     //
00331     //  Implementation Methods
00332     //
00333     void        init();            // Common initialization, for use by constructors.
00334     void        zap();             // Common cleanup
00335     void        dumpOp(int32_t index) const;
00336 
00337 
00338 
00339 };
00340 
00341 
00342 
00343 
00344 
00345 
00346 
00347 
00348 
00358   class U_I18N_API RegexMatcher: public UObject {
00359 public:
00366     virtual ~RegexMatcher();
00367 
00368     
00375     virtual UBool matches(UErrorCode &status);
00376     
00377     
00378     
00391     virtual UBool lookingAt(UErrorCode &status);
00392     
00393     
00406     virtual UBool find();
00407     
00408     
00418     virtual UBool find(int32_t start, UErrorCode &status); 
00419     
00420     
00421    /*
00422     *   Returns a string containing the text matched by the previous match. 
00423     *   If the pattern can match an empty string, an empty string may be returned.
00424     *   @param   status      A reference to a UErrorCode to receive any errors.  
00425     *                        Possible errors are  U_REGEX_INVALID_STATE if no match
00426     *                        has been attempted or the last match failed. 
00427     *   @return  a string containing the matched input text.  
00428     *   @draft ICU 2.4
00429     */
00430     virtual UnicodeString group(UErrorCode &status) const;
00431     
00432     
00445     virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; 
00446     
00447     
00453     virtual int32_t groupCount() const;
00454     
00455     
00463     virtual int32_t start(UErrorCode &status) const;
00464     
00465     
00479     virtual int32_t start(int group, UErrorCode &status) const;
00480     
00481     
00491     virtual int32_t end(UErrorCode &status) const;
00492     
00493     
00507     virtual int32_t end(int group, UErrorCode &status) const; 
00508     
00509     
00518     virtual RegexMatcher &reset();
00519     
00520     
00528     virtual RegexMatcher &reset(const UnicodeString &input);  
00529     
00530     
00537     virtual const UnicodeString &input() const; 
00538     
00539     
00545     virtual const RegexPattern &pattern() const;
00546     
00547     
00564     virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status); 
00565     
00566     
00587     virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status); 
00588     
00616     virtual RegexMatcher &appendReplacement(UnicodeString &dest,
00617         const UnicodeString &replacement, UErrorCode &status);
00618     
00619     
00630     virtual UnicodeString &appendTail(UnicodeString &dest); 
00631     
00632 
00638     virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
00639     
00645     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
00646     
00647 private:
00648     // Constructors and other object boilerplate are private.
00649     // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
00650     // Creation by users is only through the factory method in class RegexPattern
00651     RegexMatcher(const RegexPattern *pat); 
00652     RegexMatcher(const RegexMatcher &other);
00653     RegexMatcher &operator =(const RegexMatcher &rhs);
00654     friend class RegexPattern;
00655 
00656 
00657     //
00658     //  MatchAt   This is the internal interface to the match engine itself.
00659     //            Match status comes back in matcher member variables.
00660     //
00661     void         MatchAt(int32_t startIdx, UErrorCode &status);   
00662     inline  void backTrack(int32_t &inputIdx, int32_t &patIdx);
00663     UBool        isWordBoundary(int32_t pos);         // perform the \b test
00664 
00665 
00666     const RegexPattern  *fPattern;
00667     const UnicodeString *fInput;
00668     int32_t              fInputLength;
00669     UBool                fMatch;           // True if the last match was successful.
00670     int32_t              fMatchStart;      // Position of the start of the most recent match
00671     int32_t              fMatchEnd;        // First position after the end of the most recent match
00672     int32_t              fLastMatchEnd;    // First position after the end of the previous match.
00673     UStack              *fBackTrackStack;
00674     UVector             *fCaptureStarts;
00675     UVector             *fCaptureEnds;
00676 
00681     static const char   fgClassID;
00682 
00683 
00684 };  
00685 
00686 U_NAMESPACE_END
00687 #endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
00688 #endif

Generated on Wed Dec 18 16:49:44 2002 for ICU 2.4 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001