#include <unistr.h>
Inheritance diagram for UnicodeString:
Public Methods | |
UBool | operator== (const UnicodeString& text) const |
Equality operator. More... | |
UBool | operator!= (const UnicodeString& text) const |
Inequality operator. More... | |
UBool | operator> (const UnicodeString& text) const |
Greater than operator. More... | |
UBool | operator< (const UnicodeString& text) const |
Less than operator. More... | |
UBool | operator>= (const UnicodeString& text) const |
Greater than or equal operator. More... | |
UBool | operator<= (const UnicodeString& text) const |
Less than or equal operator. More... | |
int8_t | compare (const UnicodeString& text) const |
Compare the characters bitwise in this UnicodeString to the characters in text . More... | |
int8_t | compare (UTextOffset start, int32_t length, const UnicodeString& srcText) const |
Compare the characters bitwise in the range [start , start + length ) with the characters in srcText . More... | |
int8_t | compare (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) const |
Compare the characters bitwise in the range [start , start + length ) with the characters in srcText in the range [srcStart , srcStart + srcLength ). More... | |
int8_t | compare (const UChar *srcChars, int32_t srcLength) const |
Compare the characters bitwise in this UnicodeString with the first srcLength characters in srcChars . More... | |
int8_t | compare (UTextOffset start, int32_t length, const UChar *srcChars) const |
Compare the characters bitwise in the range [start , start + length ) with the first length characters in srcChars . More... | |
int8_t | compare (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) const |
Compare the characters bitwise in the range [start , start + length ) with the characters in srcChars in the range [srcStart , srcStart + srcLength ). More... | |
int8_t | compareBetween (UTextOffset start, UTextOffset limit, const UnicodeString& srcText, UTextOffset srcStart, UTextOffset srcLimit) const |
Compare the characters bitwise in the range [start , limit ) with the characters in srcText in the range [srcStart , srcLimit ). More... | |
int8_t | compareCodePointOrder (const UnicodeString& text) const |
Compare two Unicode strings in code point order. More... | |
int8_t | compareCodePointOrder (UTextOffset start, int32_t length, const UnicodeString& srcText) const |
Compare two Unicode strings in code point order. More... | |
int8_t | compareCodePointOrder (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) const |
Compare two Unicode strings in code point order. More... | |
int8_t | compareCodePointOrder (const UChar *srcChars, int32_t srcLength) const |
Compare two Unicode strings in code point order. More... | |
int8_t | compareCodePointOrder (UTextOffset start, int32_t length, const UChar *srcChars) const |
Compare two Unicode strings in code point order. More... | |
int8_t | compareCodePointOrder (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) const |
Compare two Unicode strings in code point order. More... | |
int8_t | compareCodePointOrderBetween (UTextOffset start, UTextOffset limit, const UnicodeString& srcText, UTextOffset srcStart, UTextOffset srcLimit) const |
Compare two Unicode strings in code point order. More... | |
int8_t | caseCompare (const UnicodeString& text, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
int8_t | caseCompare (UTextOffset start, int32_t length, const UnicodeString& srcText, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
int8_t | caseCompare (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
int8_t | caseCompare (const UChar *srcChars, int32_t srcLength, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
int8_t | caseCompare (UTextOffset start, int32_t length, const UChar *srcChars, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
int8_t | caseCompare (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
int8_t | caseCompareBetween (UTextOffset start, UTextOffset limit, const UnicodeString& srcText, UTextOffset srcStart, UTextOffset srcLimit, uint32_t options) const |
Compare two strings case-insensitively using full case folding. More... | |
UBool | startsWith (const UnicodeString& text) const |
Determine if this starts with the characters in text . More... | |
UBool | startsWith (const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) const |
Determine if this starts with the characters in srcText in the range [srcStart , srcStart + srcLength ). More... | |
UBool | startsWith (const UChar *srcChars, int32_t srcLength) const |
Determine if this starts with the characters in srcChars . More... | |
UBool | startsWith (const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) const |
Determine if this ends with the characters in srcChars in the range [srcStart , srcStart + srcLength ). More... | |
UBool | endsWith (const UnicodeString& text) const |
Determine if this ends with the characters in text . More... | |
UBool | endsWith (const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) const |
Determine if this ends with the characters in srcText in the range [srcStart , srcStart + srcLength ). More... | |
UBool | endsWith (const UChar *srcChars, int32_t srcLength) const |
Determine if this ends with the characters in srcChars . More... | |
UBool | endsWith (const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) const |
Determine if this ends with the characters in srcChars in the range [srcStart , srcStart + srcLength ). More... | |
UTextOffset | indexOf (const UnicodeString& text) const |
Locate in this the first occurrence of the characters in text , using bitwise comparison. More... | |
UTextOffset | indexOf (const UnicodeString& text, UTextOffset start) const |
Locate in this the first occurrence of the characters in text starting at offset start , using bitwise comparison. More... | |
UTextOffset | indexOf (const UnicodeString& text, UTextOffset start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in text , using bitwise comparison. More... | |
UTextOffset | indexOf (const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength, UTextOffset start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in srcText in the range [srcStart , srcStart + srcLength ), using bitwise comparison. More... | |
UTextOffset | indexOf (const UChar *srcChars, int32_t srcLength, UTextOffset start) const |
Locate in this the first occurrence of the characters in srcChars starting at offset start , using bitwise comparison. More... | |
UTextOffset | indexOf (const UChar *srcChars, int32_t srcLength, UTextOffset start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in srcChars , using bitwise comparison. More... | |
UTextOffset | indexOf (const UChar *srcChars, UTextOffset srcStart, int32_t srcLength, UTextOffset start, int32_t length) const |
Locate in this the first occurrence in the range [start , start + length ) of the characters in srcChars in the range [srcStart , srcStart + srcLength ), using bitwise comparison. More... | |
UTextOffset | indexOf (UChar c) const |
Locate in this the first occurrence of the code point c , using bitwise comparison. More... | |
UTextOffset | indexOf (UChar32 c) const |
Locate in this the first occurrence of the code point c , using bitwise comparison. More... | |
UTextOffset | indexOf (UChar c, UTextOffset start) const |
Locate in this the first occurrence of the code point c starting at offset start , using bitwise comparison. More... | |
UTextOffset | indexOf (UChar32 c, UTextOffset start) const |
Locate in this the first occurrence of the code point c starting at offset start , using bitwise comparison. More... | |
UTextOffset | indexOf (UChar c, UTextOffset start, int32_t length) const |
Locate in this the first occurrence of the code point c in the range [start , start + length ), using bitwise comparison. More... | |
UTextOffset | indexOf (UChar32 c, UTextOffset start, int32_t length) const |
Locate in this the first occurrence of the code point c in the range [start , start + length ), using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UnicodeString& text) const |
Locate in this the last occurrence of the characters in text , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UnicodeString& text, UTextOffset start) const |
Locate in this the last occurrence of the characters in text starting at offset start , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UnicodeString& text, UTextOffset start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in text , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength, UTextOffset start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in srcText in the range [srcStart , srcStart + srcLength ), using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UChar *srcChars, int32_t srcLength, UTextOffset start) const |
Locate in this the last occurrence of the characters in srcChars starting at offset start , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UChar *srcChars, int32_t srcLength, UTextOffset start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in srcChars , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (const UChar *srcChars, UTextOffset srcStart, int32_t srcLength, UTextOffset start, int32_t length) const |
Locate in this the last occurrence in the range [start , start + length ) of the characters in srcChars in the range [srcStart , srcStart + srcLength ), using bitwise comparison. More... | |
UTextOffset | lastIndexOf (UChar c) const |
Locate in this the last occurrence of the code point c , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (UChar32 c) const |
Locate in this the last occurrence of the code point c , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (UChar c, UTextOffset start) const |
Locate in this the last occurrence of the code point c starting at offset start , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (UChar32 c, UTextOffset start) const |
Locate in this the last occurrence of the code point c starting at offset start , using bitwise comparison. More... | |
UTextOffset | lastIndexOf (UChar c, UTextOffset start, int32_t length) const |
Locate in this the last occurrence of the code point c in the range [start , start + length ), using bitwise comparison. More... | |
UTextOffset | lastIndexOf (UChar32 c, UTextOffset start, int32_t length) const |
Locate in this the last occurrence of the code point c in the range [start , start + length ), using bitwise comparison. More... | |
UChar | charAt (UTextOffset offset) const |
Return the code unit at offset offset . More... | |
UChar | operator[] (UTextOffset offset) const |
Return the code unit at offset offset . More... | |
UChar32 | char32At (UTextOffset offset) const |
Return the code point that contains the code unit at offset offset . More... | |
UTextOffset | getCharStart (UTextOffset offset) |
Adjust a random-access offset so that it points to the beginning of a Unicode character. More... | |
UTextOffset | getCharLimit (UTextOffset offset) |
Adjust a random-access offset so that it points behind a Unicode character. More... | |
void | extract (UTextOffset start, int32_t length, UChar *dst, UTextOffset dstStart = 0) const |
Copy the characters in the range [start , start + length ) into the array dst , beginning at dstStart . More... | |
void | extract (UTextOffset start, int32_t length, UnicodeString& target) const |
Copy the characters in the range [start , start + length ) into the UnicodeString target . More... | |
void | extractBetween (UTextOffset start, UTextOffset limit, UChar *dst, UTextOffset dstStart = 0) const |
Copy the characters in the range [start , limit ) into the array dst , beginning at dstStart . More... | |
void | extractBetween (UTextOffset start, UTextOffset limit, UnicodeString& target) const |
Copy the characters in the range [start , limit ) into the UnicodeString target . More... | |
int32_t | extract (UTextOffset start, int32_t startLength, char *target, const char *codepage = 0) const |
Copy the characters in the range [start , start + length ) into an array of characters in a specified codepage. More... | |
int32_t | extract (UTextOffset start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage = 0) const |
Copy the characters in the range [start , start + length ) into an array of characters in a specified codepage. More... | |
int32_t | length (void) const |
Return the length of the UnicodeString object. More... | |
UBool | empty (void) const |
Determine if this string is empty. More... | |
int32_t | hashCode (void) const |
Generate a hash code for this object. More... | |
UBool | isBogus (void) const |
Determine if this string is still valid. More... | |
UnicodeString& | operator= (const UnicodeString& srcText) |
Assignment operator. More... | |
UnicodeString& | operator= (UChar ch) |
Assignment operator. More... | |
UnicodeString& | operator= (UChar32 ch) |
Assignment operator. More... | |
UnicodeString& | setTo (const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) |
Set the text in the UnicodeString object to the characters in srcText in the range [srcStart , srcStart + srcLength ). More... | |
UnicodeString& | setTo (const UnicodeString& srcText) |
Set the text in the UnicodeString object to the characters in srcText . More... | |
UnicodeString& | setTo (const UChar *srcChars, int32_t srcLength) |
Set the characters in the UnicodeString object to the characters in srcChars . More... | |
UnicodeString& | setTo (UChar srcChar) |
Set the characters in the UnicodeString object to the code point srcChar . More... | |
UnicodeString& | setTo (UChar32 srcChar) |
Set the characters in the UnicodeString object to the code point srcChar . More... | |
UnicodeString& | setTo (UBool isTerminated, const UChar *text, int32_t textLength) |
Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. More... | |
UnicodeString& | setTo (UChar *buffer, int32_t buffLength, int32_t buffCapacity) |
Aliasing setTo() function, analogous to the writeable-aliasing UChar* constructor. More... | |
UnicodeString& | setCharAt (UTextOffset offset, UChar ch) |
Set the character at the specified offset to the specified character. More... | |
UnicodeString& | operator+= (UChar ch) |
Append operator. More... | |
UnicodeString& | operator+= (UChar32 ch) |
Append operator. More... | |
UnicodeString& | operator+= (const UnicodeString& srcText) |
Append operator. More... | |
UnicodeString& | append (const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) |
Append the characters in srcText in the range [srcStart , srcStart + srcLength ) to the UnicodeString object at offset start . More... | |
UnicodeString& | append (const UnicodeString& srcText) |
Append the characters in srcText to the UnicodeString object at offset start . More... | |
UnicodeString& | append (const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) |
Append the characters in srcChars in the range [srcStart , srcStart + srcLength ) to the UnicodeString object at offset start . More... | |
UnicodeString& | append (const UChar *srcChars, int32_t srcLength) |
Append the characters in srcChars to the UnicodeString object at offset start . More... | |
UnicodeString& | append (UChar srcChar) |
Append the code point srcChar to the UnicodeString object. More... | |
UnicodeString& | append (UChar32 srcChar) |
Append the code point srcChar to the UnicodeString object. More... | |
UnicodeString& | insert (UTextOffset start, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) |
Insert the characters in srcText in the range [srcStart , srcStart + srcLength ) into the UnicodeString object at offset start . More... | |
UnicodeString& | insert (UTextOffset start, const UnicodeString& srcText) |
Insert the characters in srcText into the UnicodeString object at offset start . More... | |
UnicodeString& | insert (UTextOffset start, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) |
Insert the characters in srcChars in the range [srcStart , srcStart + srcLength ) into the UnicodeString object at offset start . More... | |
UnicodeString& | insert (UTextOffset start, const UChar *srcChars, int32_t srcLength) |
Insert the characters in srcChars into the UnicodeString object at offset start . More... | |
UnicodeString& | insert (UTextOffset start, UChar srcChar) |
Insert the code point srcChar into the UnicodeString object at offset start . More... | |
UnicodeString& | insert (UTextOffset start, UChar32 srcChar) |
Insert the code point srcChar into the UnicodeString object at offset start . More... | |
UnicodeString& | replace (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) |
Replace the characters in the range [start , start + length ) with the characters in srcText in the range [srcStart , srcStart + srcLength ). More... | |
UnicodeString& | replace (UTextOffset start, int32_t length, const UnicodeString& srcText) |
Replace the characters in the range [start , start + length ) with the characters in srcText . More... | |
UnicodeString& | replace (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) |
Replace the characters in the range [start , start + length ) with the characters in srcChars in the range [srcStart , srcStart + srcLength ). More... | |
UnicodeString& | replace (UTextOffset start, int32_t length, const UChar *srcChars, int32_t srcLength) |
Replace the characters in the range [start , start + length ) with the characters in srcChars . More... | |
UnicodeString& | replace (UTextOffset start, int32_t length, UChar srcChar) |
Replace the characters in the range [start , start + length ) with the code point srcChar . More... | |
UnicodeString& | replace (UTextOffset start, int32_t length, UChar32 srcChar) |
Replace the characters in the range [start , start + length ) with the code point srcChar . More... | |
UnicodeString& | replaceBetween (UTextOffset start, UTextOffset limit, const UnicodeString& srcText) |
Replace the characters in the range [start , limit ) with the characters in srcText . More... | |
UnicodeString& | replaceBetween (UTextOffset start, UTextOffset limit, const UnicodeString& srcText, UTextOffset srcStart, UTextOffset srcLimit) |
Replace the characters in the range [start , limit ) with the characters in srcText in the range [srcStart , srcLimit ). More... | |
virtual void | handleReplaceBetween (UTextOffset start, UTextOffset limit, const UnicodeString& text) |
Replace a substring of this object with the given text. More... | |
virtual void | copy (int32_t start, int32_t limit, int32_t dest) |
Copy a substring of this object, retaining attribute (out-of-band) information. More... | |
UnicodeString& | findAndReplace (const UnicodeString& oldText, const UnicodeString& newText) |
Replace all occurrences of characters in oldText with the characters in newText. More... | |
UnicodeString& | findAndReplace (UTextOffset start, int32_t length, const UnicodeString& oldText, const UnicodeString& newText) |
Replace all occurrences of characters in oldText with characters in newText in the range [start , start + length ). More... | |
UnicodeString& | findAndReplace (UTextOffset start, int32_t length, const UnicodeString& oldText, UTextOffset oldStart, int32_t oldLength, const UnicodeString& newText, UTextOffset newStart, int32_t newLength) |
Replace all occurrences of characters in oldText in the range [oldStart , oldStart + oldLength ) with the characters in newText in the range [newStart , newStart + newLength ) in the range [start , start + length ). More... | |
UnicodeString& | remove (void) |
Remove all characters from the UnicodeString object. More... | |
UnicodeString& | remove (UTextOffset start, int32_t length = INT32_MAX) |
Remove the characters in the range [start , start + length ) from the UnicodeString object. More... | |
UnicodeString& | removeBetween (UTextOffset start, UTextOffset limit = INT32_MAX) |
Remove the characters in the range [start , limit ) from the UnicodeString object. More... | |
UBool | padLeading (int32_t targetLength, UChar padChar = 0x0020) |
Pad the start of this UnicodeString with the character padChar . More... | |
UBool | padTrailing (int32_t targetLength, UChar padChar = 0x0020) |
Pad the end of this UnicodeString with the character padChar . More... | |
UBool | truncate (int32_t targetLength) |
Truncate this UnicodeString to the targetLength . More... | |
UnicodeString& | trim (void) |
Trims leading and trailing whitespace from this UnicodeString. More... | |
UnicodeString& | reverse (void) |
Reverse this UnicodeString in place. More... | |
UnicodeString& | reverse (UTextOffset start, int32_t length) |
Reverse the range [start , start + length ) in this UnicodeString. More... | |
UnicodeString& | toUpper (void) |
Convert the characters in this to UPPER CASE following the conventions of the default locale. More... | |
UnicodeString& | toUpper (const Locale& locale) |
Convert the characters in this to UPPER CASE following the conventions of a specific locale. More... | |
UnicodeString& | toLower (void) |
Convert the characters in this to lower case following the conventions of the default locale. More... | |
UnicodeString& | toLower (const Locale& locale) |
Convert the characters in this to lower case following the conventions of a specific locale. More... | |
UnicodeString& | foldCase (uint32_t options=U_FOLD_CASE_DEFAULT) |
Case-fold the characters in this string. More... | |
UnicodeString () | |
Construct an empty UnicodeString. More... | |
UnicodeString (int32_t capacity, UChar32 c, int32_t count) | |
Construct a UnicodeString with capacity to hold capacity UChars. More... | |
UnicodeString (UChar ch) | |
Single UChar32 (code point) constructor. More... | |
UnicodeString (UChar32 ch) | |
Single UChar32 (code point) constructor. More... | |
UnicodeString (const UChar *text) | |
UChar* constructor. More... | |
UnicodeString (const UChar *text, int32_t textLength) | |
UChar* constructor. More... | |
UnicodeString (UBool isTerminated, const UChar *text, int32_t textLength) | |
Readonly-aliasing UChar* constructor. More... | |
UnicodeString (UChar *buffer, int32_t buffLength, int32_t buffCapacity) | |
Writeable-aliasing UChar* constructor. More... | |
UnicodeString (const char *codepageData, const char *codepage = 0) | |
char* constructor. More... | |
UnicodeString (const char *codepageData, int32_t dataLength, const char *codepage = 0) | |
char* constructor. More... | |
UnicodeString (const UnicodeString& that) | |
Copy constructor. More... | |
~UnicodeString () | |
Destructor. More... | |
int32_t | numDisplayCells (UTextOffset start = 0, int32_t length = INT32_MAX, UBool asian = TRUE) const |
Returns the number of display cells occupied by the range [start , length ). More... | |
UCharReference | operator[] (UTextOffset pos) |
UnicodeString | unescape () const |
Unescape a string of characters and return a string containing the result. More... | |
UChar32 | unescapeAt (int32_t &offset) const |
Unescape a single escape sequence and return the represented character. More... | |
const UChar* | getUChars () const |
Protected Methods | |
virtual UChar | getCharAt (UTextOffset offset) const |
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline again (see jitterbug 709). | |
virtual UChar32 | getChar32At (UTextOffset offset) const |
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline again (see jitterbug 709). | |
Private Types | |
enum | { US_STACKBUF_SIZE = 3, kInvalidUChar = 0xffff, kGrowSize = 128, kInvalidHashCode = 0, kEmptyHashCode = 1, kIsBogus = 1, kUsingStackBuffer = 2, kRefCounted = 4, kBufferIsReadonly = 8, kShortString = kUsingStackBuffer, kLongString = kRefCounted, kReadonlyAlias = kBufferIsReadonly, kWriteableAlias = 0 } |
Private Methods | |
int8_t | doCompare (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) const |
int8_t | doCompare (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) const |
int8_t | doCompareCodePointOrder (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) const |
int8_t | doCompareCodePointOrder (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) const |
int8_t | doCaseCompare (UTextOffset start, int32_t length, const UnicodeString &srcText, UTextOffset srcStart, int32_t srcLength, uint32_t options) const |
int8_t | doCaseCompare (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength, uint32_t options) const |
UTextOffset | doIndexOf (UChar c, UTextOffset start, int32_t length) const |
UTextOffset | doLastIndexOf (UChar c, UTextOffset start, int32_t length) const |
void | doExtract (UTextOffset start, int32_t length, UChar *dst, UTextOffset dstStart) const |
void | doExtract (UTextOffset start, int32_t length, UnicodeString& target) const |
UChar | doCharAt (UTextOffset offset) const |
UnicodeString& | doReplace (UTextOffset start, int32_t length, const UnicodeString& srcText, UTextOffset srcStart, int32_t srcLength) |
UnicodeString& | doReplace (UTextOffset start, int32_t length, const UChar *srcChars, UTextOffset srcStart, int32_t srcLength) |
UnicodeString& | doReverse (UTextOffset start, int32_t length) |
int32_t | doHashCode (void) const |
UChar* | getArrayStart (void) |
const UChar* | getArrayStart (void) const |
int32_t | getCapacity (void) const |
UBool | allocate (int32_t capacity) |
void | releaseArray () |
void | setToBogus (void) |
void | pinIndices (UTextOffset& start, int32_t& length) const |
void | doCodepageCreate (const char *codepageData, int32_t dataLength, const char *codepage) |
UBool | cloneArrayIfNeeded (int32_t newCapacity = -1, int32_t growCapacity = -1, UBool doCopyArray = TRUE, int32_t **pBufferToDelete = 0, UBool forceClone = FALSE) |
UnicodeString& | caseMap (const Locale& locale, uint32_t options, int32_t toWhichCase) |
int32_t | addRef (void) |
int32_t | removeRef (void) |
int32_t | refCount (void) const |
int32_t | setRefCount (int32_t count) |
Private Attributes | |
int32_t | fCapacity |
UChar* | fArray |
uint16_t | fFlags |
UChar | fStackBuffer [ US_STACKBUF_SIZE ] |
Static Private Methods | |
UBool U_CALLCONV | growBuffer (void *context, UChar **buffer, int32_t *pCapacity, int32_t reqCapacity, int32_t length) |
UConverter* | getDefaultConverter (UErrorCode& status) |
void | releaseDefaultConverter (UConverter *converter) |
Static Private Attributes | |
UConverter* | fgDefaultConverter |
Friends | |
class | UnicodeStringStreamer |
class | UnicodeConverter |
class | StringCharacterIterator |
It is a concrete implementation of the abstract class Replaceable (for transliteration).
In ICU, strings are stored and used as UTF-16. This means that a string internally consists of 16-bit Unicode code units.
UTF-16 is a variable-length encoding: A Unicode character may be stored with either one code unit — which is the most common case — or with a matched pair of special code units ("surrogates"). The data type for code units is UChar.
For single-character handling, a Unicode character code point is a scalar value in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
Indexes and offsets into and lengths of strings always count code units, not code points. This is the same as with multi-byte char* strings in traditional string handling. Operations on partial strings typically do not test for code point boundaries. If necessary, the user needs to take care of such boundaries by testing for the code unit values or by using functions like UnicodeString::getCharStart() and UnicodeString::getCharLimit() (or, in C, the equivalent macros UTF_SET_CHAR_START() and UTF_SET_CHAR_LIMIT(), see utf.h).
UnicodeString uses four storage models:
Definition at line 129 of file unistr.h.
|
|
|
Construct an empty UnicodeString.
|
|
Construct a UnicodeString with capacity to hold
|
|
Single UChar32 (code point) constructor.
|
|
Single UChar32 (code point) constructor.
|
|
UChar* constructor.
|
|
UChar* constructor.
|
|
Readonly-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified. In an assignment to another UnicodeString, the text will be aliased again, so that both strings then alias the same readonly-text.
|
|
Writeable-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has write-through semantics: For as long as the capacity of the buffer is sufficient, write operations will directly affect the buffer. When more capacity is necessary, then a new buffer will be allocated and the contents copied as with regularly constructed strings. In an assignment to another UnicodeString, the buffer will be copied. The extract(UChar *dst) function detects whether the dst pointer is the same as the string buffer itself and will in this case not copy the contents.
|
|
char* constructor.
|
|
char* constructor.
|
|
Copy constructor.
|
|
Destructor.
|
|
|
|
|
|
Append the code point
|
|
Append the code point
|
|
Append the characters in
|
|
Append the characters in
|
|
Append the characters in
|
|
Append the characters in
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
|
|
Compare two strings case-insensitively using full case folding. This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
|
|
|
|
Return the code point that contains the code unit at offset
Reimplemented from Replaceable. |
|
Return the code unit at offset
Reimplemented from Replaceable. Definition at line 3111 of file unistr.h. Referenced by DecimalFormatSymbols::getDecimalSeparator(), DecimalFormatSymbols::getDigit(), DecimalFormatSymbols::getExponentialSymbol(), DecimalFormatSymbols::getGroupingSeparator(), DecimalFormatSymbols::getMinusSign(), DecimalFormatSymbols::getMonetaryDecimalSeparator(), DecimalFormat::getPadCharacter(), DecimalFormatSymbols::getPadEscape(), DecimalFormatSymbols::getPatternSeparator(), DecimalFormatSymbols::getPerMill(), DecimalFormatSymbols::getPercent(), DecimalFormatSymbols::getPlusSign(), DecimalFormatSymbols::getZeroDigit(), UCharReference::operator UChar(), and UCharReference::operator=(). |
|
|
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in this UnicodeString with the first
|
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in the range [
|
|
Compare the characters bitwise in this UnicodeString to the characters in
Definition at line 2539 of file unistr.h. Referenced by DecimalFormatSymbols::compareCurrencySymbol(), DecimalFormatSymbols::compareInfinity(), DecimalFormatSymbols::compareInternationalCurrencySymbol(), DecimalFormatSymbols::compareNaN(), and startsWith(). |
|
Compare the characters bitwise in the range [
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Compare two Unicode strings in code point order. This is different in UTF-16 from how compare(), operator==, startsWith() etc. work if supplementary characters are present: In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
|
Copy a substring of this object, retaining attribute (out-of-band) information. This method is used to duplicate or reorder substrings. The destination index must not overlap the source range.
Reimplemented from Replaceable. |
|
|
|
Definition at line 2723 of file unistr.h. Referenced by caseCompare(), and caseCompareBetween(). |
|
Definition at line 3101 of file unistr.h. Referenced by charAt(), and operator[](). |
|
|
|
|
|
Definition at line 2585 of file unistr.h. Referenced by compare(), compareBetween(), endsWith(), operator<(), operator<=(), operator==(), operator>(), operator>=(), and startsWith(). |
|
|
|
Definition at line 2648 of file unistr.h. Referenced by compareCodePointOrder(), and compareCodePointOrderBetween(). |
|
|
|
Referenced by extract(), and extractBetween(). |
|
Referenced by hashCode(). |
|
Referenced by indexOf(). |
|
Referenced by lastIndexOf(). |
|
|
|
Referenced by append(), insert(), operator+=(), operator=(), remove(), removeBetween(), replace(), replaceBetween(), and setTo(). |
|
Referenced by reverse(). |
|
Determine if this string is empty.
|
|
Determine if this ends with the characters in
|
|
Determine if this ends with the characters in
|
|
Determine if this ends with the characters in
|
|
Determine if this ends with the characters in
|
|
Copy the characters in the range [
|
|
Copy the characters in the range [
|
|
Copy the characters in the range [
|
|
Copy the characters in the range [
If the string aliases to
|
|
Copy the characters in the range [
|
|
Copy the characters in the range [
|
|
Replace all occurrences of characters in oldText in the range [
|
|
Replace all occurrences of characters in oldText with characters in newText in the range [
|
|
Replace all occurrences of characters in oldText with the characters in newText.
Definition at line 3044 of file unistr.h. Referenced by findAndReplace(). |
|
Case-fold the characters in this string. Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i that are marked with 'I' in CaseFolding.txt. The result may be longer or shorter than the original.
|
|
|
|
Definition at line 3349 of file unistr.h. Referenced by doCaseCompare(), doCompare(), doCompareCodePointOrder(), indexOf(), and lastIndexOf(). |
|
|
|
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline again (see jitterbug 709).
Reimplemented from Replaceable. |
|
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline again (see jitterbug 709).
Reimplemented from Replaceable. |
|
Adjust a random-access offset so that it points behind a Unicode character. The offset that is passed in points behind any code unit of a code point, while the returned offset will point behind the last code unit of the same code point. In UTF-16, if the input offset points behind the first surrogate (i.e., to the second surrogate) of a surrogate pair, then the returned offset will point behind the second surrogate (i.e., to the first surrogate).
|
|
Adjust a random-access offset so that it points to the beginning of a Unicode character. The offset that is passed in points to any code unit of a code point, while the returned offset will point to the first code unit of the same code point. In UTF-16, if the input offset points to a second surrogate of a surrogate pair, then the returned offset will point to the first surrogate.
|
|
|
|
|
|
|
|
Replace a substring of this object with the given text.
Reimplemented from Replaceable. |
|
Generate a hash code for this object.
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence of the code point
|
|
Locate in this the first occurrence in the range [
|
|
Locate in this the first occurrence in the range [
|
|
Locate in this the first occurrence of the characters in
|
|
Locate in this the first occurrence in the range [
|
|
Locate in this the first occurrence in the range [
|
|
Locate in this the first occurrence of the characters in
|
|
Locate in this the first occurrence of the characters in
Definition at line 2741 of file unistr.h. Referenced by indexOf(). |
|
Insert the code point
|
|
Insert the code point
|
|
Insert the characters in
|
|
Insert the characters in
|
|
Insert the characters in
|
|
Insert the characters in
|
|
Determine if this string is still valid.
Definition at line 3323 of file unistr.h. Referenced by doCaseCompare(), doCompare(), doCompareCodePointOrder(), indexOf(), lastIndexOf(), and operator==(). |
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence of the code point
|
|
Locate in this the last occurrence in the range [
|
|
Locate in this the last occurrence in the range [
|
|
Locate in this the last occurrence of the characters in
|
|
Locate in this the last occurrence in the range [
|
|
Locate in this the last occurrence in the range [
|
|
Locate in this the last occurrence of the characters in
|
|
Locate in this the last occurrence of the characters in
Definition at line 2839 of file unistr.h. Referenced by lastIndexOf(). |
|
Return the length of the UnicodeString object. The length is the number of characters in the text.
Reimplemented from Replaceable. Definition at line 3158 of file unistr.h. Referenced by DecimalFormatSymbols::compareCurrencySymbol(), DecimalFormatSymbols::compareInfinity(), DecimalFormatSymbols::compareInternationalCurrencySymbol(), and DecimalFormatSymbols::compareNaN(). |
|
Returns the number of display cells occupied by the range [
This function is designed for Asian text and properly takes into account halfwidth and fullwidth variants of various CJK characters and the combining behavior of the Hangul Jamo characters (with some limitations; see documentation for Unicode::getCellWidth()).
|
|
Inequality operator. Performs only bitwise comparison.
|
|
Append operator.
Append the characters in
|
|
Append operator.
Append the code point
|
|
Append operator.
Append the code unit
|
|
Less than operator. Performs only bitwise comparison.
|
|
Less than or equal operator. Performs only bitwise comparison.
|
|
Assignment operator.
Replace the characters in this UnicodeString with the code point
|
|
Assignment operator.
Replace the characters in this UnicodeString with the code unit
|
|
Assignment operator.
Replace the characters in this UnicodeString with the characters from
|
|
Equality operator. Performs only bitwise comparison.
Definition at line 2506 of file unistr.h. Referenced by operator!=(). |
|
Greater than operator. Performs only bitwise comparison.
|
|
Greater than or equal operator. Performs only bitwise comparison.
|
|
|
|
Return the code unit at offset
|
|
Pad the start of this UnicodeString with the character If the length of this UnicodeString is less than targetLength, length() - targetLength copies of padChar will be added to the beginning of this UnicodeString.
|
|
Pad the end of this UnicodeString with the character If the length of this UnicodeString is less than targetLength, length() - targetLength copies of padChar will be added to the end of this UnicodeString.
|
|
Definition at line 3332 of file unistr.h. Referenced by doCaseCompare(), doCompare(), doCompareCodePointOrder(), indexOf(), and lastIndexOf(). |
|
|
|
|
|
|
|
Remove the characters in the range [
|
|
Remove all characters from the UnicodeString object.
|
|
Remove the characters in the range [
|
|
Definition at line 3372 of file unistr.h. Referenced by releaseArray(). |
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Replace the characters in the range [
Definition at line 2991 of file unistr.h. Referenced by doExtract(). |
|
Replace the characters in the range [
|
|
Replace the characters in the range [
|
|
Reverse the range [
|
|
Reverse this UnicodeString in place.
|
|
Set the character at the specified offset to the specified character.
Referenced by UCharReference::operator=(). |
|
|
|
Aliasing setTo() function, analogous to the writeable-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has write-through semantics: For as long as the capacity of the buffer is sufficient, write operations will directly affect the buffer. When more capacity is necessary, then a new buffer will be allocated and the contents copied as with regularly constructed strings. In an assignment to another UnicodeString, the buffer will be copied. The extract(UChar *dst) function detects whether the dst pointer is the same as the string buffer itself and will in this case not copy the contents.
|
|
Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified. In an assignment to another UnicodeString, the text will be aliased again, so that both strings then alias the same readonly-text.
|
|
Set the characters in the UnicodeString object to the code point
|
|
Set the characters in the UnicodeString object to the code point
|
|
Set the characters in the UnicodeString object to the characters in
|
|
Set the text in the UnicodeString object to the characters in
|
|
Set the text in the UnicodeString object to the characters in
|
|
|
|
Determine if this ends with the characters in
|
|
Determine if this starts with the characters in
|
|
Determine if this starts with the characters in
|
|
Determine if this starts with the characters in
|
|
Convert the characters in this to lower case following the conventions of a specific locale.
|
|
Convert the characters in this to lower case following the conventions of the default locale.
|
|
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
|
|
Convert the characters in this to UPPER CASE following the conventions of the default locale.
|
|
Trims leading and trailing whitespace from this UnicodeString.
|
|
Truncate this UnicodeString to the
|
|
Unescape a string of characters and return a string containing the result. The following escape sequences are recognized: \uhhhh 4 hex digits; h in [0-9A-Fa-f] \Uhhhhhhhh 8 hex digits \xhh 1-2 hex digits \ooo 1-3 octal digits; o in [0-7] as well as the standard ANSI C escapes:
=> U+0007, => U+0008, \t => U+0009, Anything else following a backslash is generically escaped. For example, "[a\-z]" returns "[a-z]". If an escape sequence is ill-formed, this method returns an empty string. An example of an ill-formed sequence is "\u" followed by fewer than 4 hex digits. This function is similar to u_unescape() but not identical to it. The latter takes a source char*, so it does escape recognition and also invariant conversion.
|
|
Unescape a single escape sequence and return the represented character. See unescape() for a listing of the recognized escape sequences. The character at offset-1 is assumed (without checking) to be a backslash. If the escape sequence is ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is returned.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|