Main Page Class Hierarchy Alphabetical List Compound List File List Compound Members File Members
RuleBasedCollator Class Reference
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables.
More...
#include <tblcoll.h>
Inheritance diagram for RuleBasedCollator:
List of all members.
Public Methods |
| RuleBasedCollator (const UnicodeString& rules, UErrorCode& status) |
| RuleBasedCollator constructor. More...
|
| RuleBasedCollator ( const UnicodeString& rules, ECollationStrength collationStrength, UErrorCode& status) |
| RuleBasedCollator ( const UnicodeString& rules, Normalizer::EMode decompositionMode, UErrorCode& status) |
| RuleBasedCollator ( const UnicodeString& rules, ECollationStrength collationStrength, Normalizer::EMode decompositionMode, UErrorCode& status) |
virtual | ~RuleBasedCollator () |
| Destructor. More...
|
| RuleBasedCollator (const RuleBasedCollator& other) |
| Copy constructor. More...
|
RuleBasedCollator& | operator= (const RuleBasedCollator& other) |
| Assignment operator. More...
|
virtual UBool | operator== (const Collator& other) const |
| Returns true if "other" is the same as "this". More...
|
virtual UBool | operator!= (const Collator& other) const |
| Returns true if "other" is not the same as "this". More...
|
virtual Collator* | clone (void) const |
| Makes a deep copy of the object. More...
|
virtual CollationElementIterator* | createCollationElementIterator (const UnicodeString& source) const |
| Creates a collation element iterator for the source string. More...
|
virtual CollationElementIterator* | createCollationElementIterator (const CharacterIterator& source) const |
| Creates a collation element iterator for the source. More...
|
virtual EComparisonResult | compare ( const UnicodeString& source, const UnicodeString& target) const |
| Compares a range of character data stored in two different strings based on the collation rules. More...
|
virtual EComparisonResult | compare ( const UnicodeString& source, const UnicodeString& target, int32_t length) const |
| Compares a range of character data stored in two different strings based on the collation rules up to the specified length. More...
|
virtual EComparisonResult | compare ( const UChar* source, int32_t sourceLength, const UChar* target, int32_t targetLength) const |
| The comparison function compares the character data stored in two different string arrays. More...
|
virtual EComparisonResult | compareEx ( const UChar* source, int32_t sourceLength, const UChar* target, int32_t targetLength) const |
virtual CollationKey& | getCollationKey ( const UnicodeString& source, CollationKey& key, UErrorCode& status) const |
| Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare. More...
|
virtual CollationKey& | getCollationKey (const UChar *source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const |
| Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare. More...
|
virtual CollationKey& | getCollationKeyEx (const UChar *source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const |
virtual int32_t | hashCode (void) const |
| Generates the hash code for the rule-based collation object. More...
|
const UnicodeString& | getRules (void) const |
| Gets the table-based rules for the collation object. More...
|
int32_t | getMaxExpansion (int32_t order) const |
| Return the maximum length of any expansion sequences that end with the specified comparison order. More...
|
virtual UClassID | getDynamicClassID (void) const |
| Returns a unique class ID POLYMORPHICALLY. More...
|
uint8_t* | cloneRuleData (int32_t &length, UErrorCode &status) |
| Returns the binary format of the class's rules. More...
|
UnicodeString | getRules (UColRuleOption delta) |
| Returns current rules. More...
|
virtual void | setAttribute (UColAttribute attr, UColAttributeValue value, UErrorCode &status) |
| Universal attribute setter. More...
|
virtual UColAttributeValue | getAttribute (UColAttribute attr, UErrorCode &status) |
| Universal attribute getter. More...
|
virtual Collator* | safeClone (void) |
| Thread safe cloning operation. More...
|
virtual EComparisonResult | compare (ForwardCharacterIterator &source, ForwardCharacterIterator &target) |
| String compare that uses user supplied character iteration. More...
|
virtual int32_t | getSortKey (const UnicodeString& source, uint8_t *result, int32_t resultLength) const |
| Get the sort key as an array of bytes from an UnicodeString. More...
|
virtual int32_t | getSortKey (const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const |
| Get the sort key as an array of bytes from an UChar buffer. More...
|
Static Public Methods |
UClassID | getStaticClassID (void) |
| Returns the class ID for this class. More...
|
Private Methods |
| RuleBasedCollator () |
| Default constructor.
|
int32_t | addExpansion (int32_t anOrder, const UnicodeString &expandChars) |
| Create a new entry in the expansion table that contains the orderings for the given characers. More...
|
void | build ( const UnicodeString& rules, UErrorCode& success) |
| Create a table-based collation object with the given rules. More...
|
void | addComposedChars (void) |
| Add expanding entries for pre-composed unicode characters so that this collator can be used reasonably well with decomposition turned off.
|
void | commit (void) |
| Look up for unmapped values in the expanded character table.
|
int32_t | increment ( Collator::ECollationStrength s, int32_t lastOrder) |
| Increment of the last order based on the collation strength. More...
|
void | addOrder ( UChar ch, int32_t anOrder, UErrorCode& status) |
| Adds a character and its designated order into the collation table. More...
|
void | addExpandOrder (const UnicodeString& groupChars, const UnicodeString& expChars, int32_t anOrder, UErrorCode& status) |
| Adds the expanding string into the collation table, for example, a-umlaut in German. More...
|
void | addContractOrder (const UnicodeString& groupChars, int32_t anOrder, UErrorCode& status) |
| Adds the contracting string into the collation table, for example, ch in Spanish. More...
|
void | addContractOrder (const UnicodeString& groupChars, int32_t anOrder, UBool fwd, UErrorCode& status) |
| Adds the contracting string into the collation table, for example, ch in Spanish. More...
|
int32_t | getContractOrder (const UnicodeString &groupChars) const |
| If the given string has been specified as a contracting string in this collation table, return its ordering, otherwise return UNMAPPED. More...
|
VectorOfPToContractElement* | getContractValues (UChar ch) const |
| Gets the entry of list of the contracting string in the collation table. More...
|
VectorOfPToContractElement* | getContractValues (int32_t index) const |
| Ges the entry of list of the contracting string in the collation table. More...
|
VectorOfInt* | getExpandValueList (int32_t order) const |
| Gets the entry of value list of the expanding string in the collation table at the specified index. More...
|
int32_t | getCharOrder (UChar ch) const |
| Gets the comarison order of a character from the collation table. More...
|
UBool | writeToFile (const char* fileName) const |
| Flattens the given object persistently to a file. More...
|
void | addToCache ( const UnicodeString& key) |
| Add this table collation to the cache. More...
|
| RuleBasedCollator ( const Locale& desiredLocale, UErrorCode& status) |
| RuleBasedCollator constructor. More...
|
void | constructFromRules ( const UnicodeString& rules, UErrorCode& status) |
| Internal constructFromXyx() methods. More...
|
void | constructFromFile ( const Locale& locale, const UnicodeString& localeFileName, UBool tryBinaryFile, UErrorCode& status) |
void | constructFromFile ( const char* fileName, UErrorCode& status) |
void | constructFromCache ( const UnicodeString& key, UErrorCode& status) |
const char* | constructFromBundle (const Locale& fileName, UErrorCode& status) |
int32_t | getStrengthOrder (NormalizerIterator* cursor, UErrorCode status) const |
VectorOfInt* | makeReorderedBuffer (NormalizerIterator* cursor, UChar colFirst, int32_t lastValue, VectorOfInt* lastExpansion) const |
int32_t | strengthOrder (int32_t value) const |
int32_t | nextContractChar (NormalizerIterator *cursor, UChar ch, UErrorCode& status) const |
void* | getSomeMemory (int32_t size) |
Private Attributes |
UBool | isOverIgnore |
UChar | lastChar |
MergeCollation* | mPattern |
UnicodeString | sbuffer |
UnicodeString | tbuffer |
UnicodeString | key |
NormalizerIterator* | cursor1 |
NormalizerIterator* | cursor2 |
UBool | dataIsOwned |
TableCollationData* | data |
Normalizer::EMode | fDefaultDecomp |
void** | fSomeMemory |
int32_t* | fSizes |
int32_t | fAvailableMemory |
int32_t | fUsedMemory |
Static Private Methods |
int32_t | getEntry ( VectorOfPToContractElement* list, const UnicodeString& name, UBool fwd) |
| Gets the comarison order of a character from the collation table. More...
|
char* | createPathName ( const UnicodeString& prefix, const UnicodeString& name, const UnicodeString& suffix) |
| Creates the path name with given information. More...
|
UBool | initMutex (void) |
void | chopLocale (UnicodeString& localeName) |
| Chops off the last portion of the locale name. More...
|
Static Private Attributes |
char | fgClassID |
const int32_t | UNMAPPED |
const int32_t | CHARINDEX |
const int32_t | EXPANDCHARINDEX |
const int32_t | CONTRACTCHARINDEX |
const int32_t | PRIMARYORDERINCREMENT |
const int32_t | MAXIGNORABLE |
const int32_t | SECONDARYORDERINCREMENT |
const int32_t | TERTIARYORDERINCREMENT |
const int32_t | PRIMARYORDERMASK |
const int32_t | SECONDARYORDERMASK |
const int32_t | TERTIARYORDERMASK |
const int32_t | SECONDARYRESETMASK |
const int32_t | IGNORABLEMASK |
const int32_t | PRIMARYDIFFERENCEONLY |
const int32_t | SECONDARYDIFFERENCEONLY |
const int32_t | PRIMARYORDERSHIFT |
const int32_t | SECONDARYORDERSHIFT |
const int32_t | SORTKEYOFFSET |
const int32_t | CONTRACTCHAROVERFLOW |
const int32_t | COLELEMENTSTART |
const int32_t | PRIMARYLOWZEROMASK |
const int32_t | RESETSECONDARYTERTIARY |
const int32_t | RESETTERTIARY |
const int32_t | IGNORABLE |
const int32_t | PRIMIGNORABLE |
const int32_t | SECIGNORABLE |
const int32_t | TERIGNORABLE |
const int16_t | FILEID |
UnicodeString | DEFAULTRULES |
const char* | kFilenameSuffix |
UBool | isMutexInited |
UMTX | collMutex |
UChar | cacheKey |
Friends |
class | RuleBasedCollatorStreamer |
class | CollationElementIterator |
class | Collator |
int32_t | ucol_getNextCE (const UCollator *coll, collIterate *source, UErrorCode *status) |
int32_t | ucol_getIncrementalCE (const UCollator *coll, incrementalContext *source, UErrorCode *status) |
int32_t | getComplicatedCE (const UCollator *coll, collIterate *source, UErrorCode *status) |
int32_t | ucol_calcSortKey (const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t **result, int32_t resultLength, UBool allocatePrimary) |
UCollationResult | ucol_strcoll ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
| Compare two strings. More...
|
int32_t | ucol_getSortKeySize (const UCollator *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) |
void* | ucol_getABuffer (const UCollator *coll, uint32_t size) |
Detailed Description
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables.
The user can create a customized table-based collation.
RuleBasedCollator maps characters to collation keys.
Table Collation has the following restrictions for efficiency (other subclasses may be used for more complex languages) :
1. If the French secondary ordering is specified in a collation object, it is applied to the whole object.
2. All non-mentioned Unicode characters are at the end of the collation order.
3. Private use characters are treated as identical. The private use area in Unicode is 0xE800-0xF8FF.
The collation table is composed of a list of collation rules, where each rule is of three forms:
<modifier >
<relation > < text-argument >
<reset > < text-argument >
The following demonstrates how to create your own collation rules:
- Text Argument: A text argument is any sequence of characters, excluding special characters (that is, whitespace characters and the characters used in modifier, relation and reset). If those characters are desired, you can put them in single quotes (e.g. ampersand => '&').
- Modifier: There is a single modifier, which is used to specify that all secondary differences are sorted backwards.
'@' : Indicates that secondary differences, such as accents, are sorted backwards, as in French.
- Relation: The relations are the following:
- '<' : Greater, as a letter difference (primary)
- ';' : Greater, as an accent difference (secondary)
- ',' : Greater, as a case difference (tertiary)
- '=' : Equal
- Reset: There is a single reset, which is used primarily for contractions and expansions, but which can also be used to add a modification at the end of a set of rules.
'&' : Indicates that the next rule follows the position to where the reset text-argument would be sorted.
This sounds more complicated than it is in practice. For example, the following are equivalent ways of expressing the same thing:
a < b < c
a < b & b < c
a < c & a < b
Notice that the order is important, as the subsequent item goes immediately after the text-argument. The following are not equivalent:
a < b & a < c
a < c & a < b
Either the text-argument must already be present in the sequence, or some initial substring of the text-argument must be present. (e.g. "a < b & ae < e" is valid since "a" is present in the sequence before "ae" is reset). In this latter case, "ae" is not entered and treated as a single character; instead, "e" is sorted as if it were expanded to two characters: "a" followed by an "e". This difference appears in natural languages: in traditional Spanish "ch" is treated as though it contracts to a single character (expressed as "c < ch < d"), while in traditional German "ä" (a-umlaut) is treated as though it expands to two characters (expressed as "a & ae ; ä < b").
Ignorable Characters
For ignorable characters, the first rule must start with a relation (the examples we have used above are really fragments; "a < b" really should be "< a < b"). If, however, the first relation is not "<", then all the text-arguments up to the first "<" are ignorable. For example, ", - < a < b" makes "-" an ignorable character, as we saw earlier in the word "black-birds". In the samples for different languages, you see that most accents are ignorable.
Normalization and Accents
The Collator object automatically normalizes text internally to separate accents from base characters where possible. This is done both when processing the rules, and when comparing two strings. Collator also uses the Unicode canonical mapping to ensure that combining sequences are sorted properly (for more information, see
The Unicode Standard, Version 2.0.)
Errors
The following are errors:
- A text-argument contains unquoted punctuation symbols (e.g. "a < b-c < d").
- A relation or reset character not followed by a text-argument (e.g. "a < , b").
- A reset where the text-argument (or an initial substring of the text-argument) is not already in the sequence. (e.g. "a < b & e < f")
Examples:
Simple: "< a < b < c < d"
Norwegian: "< a,A< b,B< c,C< d,D< e,E< f,F< g,G< h,H< i,I< j,J
< k,K< l,L< m,M< n,N< o,O< p,P< q,Q< r,R< s,S< t,T
< u,U< v,V< w,W< x,X< y,Y< z,Z
< å=a°,Å=A°
;aa,AA< æ,Æ< ø,Ø"
To create a table-based collation object, simply supply the collation rules to the RuleBasedCollator contructor. For example:
Another example:
To add rules on top of an existing table, simply supply the orginal rules and modifications to RuleBasedCollator constructor. For example,
Traditional Spanish (fragment): ... & C < ch , cH , Ch , CH ...
German (fragment) : ...< y , Y < z , Z
& AE, Ä & AE, ä
& OE , Ö & OE, ö
& UE , Ü & UE, ü
Symbols (fragment): ...< y, Y < z , Z
& Question-mark ; '?'
& Ampersand ; '&'
& Dollar-sign ; '$'
To create a collation object for traditional Spanish, the user can take the English collation rules and add the additional rules to the table. For example:
In order to sort symbols in the similiar order of sorting their alphabetic equivalents, you can do the following,
Another way of creating the table-based collation object, mySimple, is:
Or,
Because " < a < b < c < d" is the same as "a < b < d & b < c" or "< a < b & b < c & c < d".
To combine collations from two locales, (without error handling for clarity)
Another more interesting example would be to make changes on an existing table to create a new collation object. For example, add "& C < ch, cH, Ch, CH" to the en_USCollation object to create your own English collation object,
The following example demonstrates how to change the order of non-spacing accents,
UChar contents[] = {
'=', 0x0301, ';', 0x0300, ';', 0x0302,
';', 0x0308, ';', 0x0327, ',', 0x0303,
';', 0x0304, ';', 0x0305, ';', 0x0306,
';', 0x0307, ';', 0x0309, ';', 0x030A,
';', 0x030B, ';', 0x030C, ';', 0x030D,
';', 0x030E, ';', 0x030F, ';', 0x0310,
';', 0x0311, ';', 0x0312,
'<', 'a', ',', 'A', ';', 'a', 'e', ',', 'A', 'E',
';', 0x00e6, ',', 0x00c6, '<', 'b', ',', 'B',
'<', 'c', ',', 'C', '<', 'e', ',', 'E', '&',
'C', '<', 'd', ',', 'D', 0 };
UnicodeString oldRules(contents);
UErrorCode status = U_ZERO_ERROR;
UChar addOn[] = { '&', ',', 0x0300, ';', 0x0308, ';', 0x0302, 0 };
oldRules += addOn;
RuleBasedCollator *myCollation = new RuleBasedCollator(oldRules, status);
The last example shows how to put new primary ordering in before the default setting. For example, in Japanese collation, you can either sort English characters before or after Japanese characters,
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator* en_USCollation =
(RuleBasedCollator*) Collator::createInstance(Locale::US, status);
if (U_FAILURE(status)) return;
UChar jaString[] = { '&', 0x2212, '<', 0x3041, ',', 0x3042, '<', 0x3043, ',', 0x3044, 0 };
UnicodeString rules( en_USCollation->getRules() );
rules += jaString;
RuleBasedCollator *myJapaneseCollation = new RuleBasedCollator(rules, status);
NOTE: Typically, a collation object is created with Collator::createInstance().
Note: RuleBasedCollator
s with different Locale, CollationStrength and Decomposition mode settings will return different sort orders for the same set of strings. Locales have specific collation rules, and the way in which secondary and tertiary differences are taken into account, for example, will result in a different sorting order for same strings.
-
See also:
-
Collator
-
Version:
-
1.27 4/8/97
-
Author(s):
-
Helena Shih
Definition at line 351 of file tblcoll.h.
Constructor & Destructor Documentation
|
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax. -
See also:
-
Locale
-
Parameters:
-
rules
|
the collation rules to build the collation table from. |
-
Stable:
-
|
RuleBasedCollator::~RuleBasedCollator (
|
) [virtual]
|
|
RuleBasedCollator::RuleBasedCollator (
|
const RuleBasedCollator & other )
|
|
|
Copy constructor.
-
Stable:
-
|
RuleBasedCollator::RuleBasedCollator (
|
) [private]
|
|
RuleBasedCollator::RuleBasedCollator (
|
const Locale & desiredLocale,
|
|
UErrorCode & status ) [private]
|
|
|
RuleBasedCollator constructor.
This constructor takes a locale. The only caller of this class should be Collator::createInstance(). If createInstance() happens to know that the requested locale's collation is implemented as a RuleBasedCollator, it can then call this constructor. OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID COLLATION TABLE. It does this by falling back to defaults. |
Member Function Documentation
void RuleBasedCollator::addComposedChars (
|
void ) [private]
|
|
|
Add expanding entries for pre-composed unicode characters so that this collator can be used reasonably well with decomposition turned off.
|
|
Adds the contracting string into the collation table, for example, ch in Spanish.
-
Parameters:
-
groupChars
|
the contracting characters. |
anOrder
|
the order. |
fwd
|
TRUE if this is for the forward direction |
status
|
the error code status. |
|
|
Adds the contracting string into the collation table, for example, ch in Spanish.
-
Parameters:
-
groupChars
|
the contracting characters. |
anOrder
|
the order. |
status
|
the error code status. |
Definition at line 1027 of file tblcoll.h. |
|
Adds the expanding string into the collation table, for example, a-umlaut in German.
-
Parameters:
-
groupChars
|
the contracting characters. |
expChars
|
the expanding characters. |
anOrder
|
the order. |
status
|
the error code status. |
|
|
Create a new entry in the expansion table that contains the orderings for the given characers.
If anOrder is valid, it is added to the beginning of the expanded list of orders. |
|
Adds a character and its designated order into the collation table.
-
Parameters:
-
ch
|
the Unicode character, |
anOrder
|
the order. |
status
|
the error code status. |
|
void RuleBasedCollator::addToCache (
|
const UnicodeString & key ) [private]
|
|
|
Add this table collation to the cache.
This involves adding the enclosed TableCollationData to the cache, and then marking our pointer as "not owned" by setting dataIsOwned to false. -
Parameters:
-
key
|
the unique that represents this collation data object. |
|
void RuleBasedCollator::chopLocale (
|
UnicodeString & localeName ) [static, private]
|
|
|
Chops off the last portion of the locale name.
For example, from "en_US_CA" to "en_US" and "en_US" to "en". -
Parameters:
-
localeName
|
the locale name. |
|
Collator * RuleBasedCollator::clone (
|
void ) const [virtual]
|
|
|
Makes a deep copy of the object.
The caller owns the returned object. -
Returns:
-
the cloned object.
-
Stable:
-
Reimplemented from Collator. |
|
Returns the binary format of the class's rules.
The format is that of .col files. -
Parameters:
-
length
|
Returns the length of the data, in bytes |
status
|
the error code status. |
-
Returns:
-
memory, owned by the caller, of size 'length' bytes.
-
Draft:
-
INTERNAL USE ONLY
|
void RuleBasedCollator::commit (
|
void ) [private]
|
|
|
Look up for unmapped values in the expanded character table.
|
|
String compare that uses user supplied character iteration.
The idea is to prevent users from having to convert the whole string into UChar's before comparing since sometimes strings differ on first couple of characters. -
Parameters:
-
coll
|
collator to be used for comparing |
source
|
pointer to function for iterating over the first string |
target
|
pointer to function for iterating over the second string |
-
Returns:
-
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS
-
Draft:
-
API 1.7 freeze
Reimplemented from Collator. |
|
The comparison function compares the character data stored in two different string arrays.
Returns information about whether a string array is less than, greater than or equal to another string array.
Example of use:
. UErrorCode status = U_ZERO_ERROR;
. Collator *myCollation = Collator::createInstance(Locale::US, status);
. if (U_FAILURE(status)) return;
. myCollation->setStrength(Collator::PRIMARY);
. // result would be Collator::EQUAL ("abc" == "ABC")
. // (no primary difference between "abc" and "ABC")
. Collator::EComparisonResult result = myCollation->compare(L"abc", 3, L"ABC", 3);
. myCollation->setStrength(Collator::TERTIARY);
. // result would be Collator::LESS (abc" <<< "ABC")
. // (with tertiary difference between "abc" and "ABC")
. Collator::EComparisonResult result = myCollation->compare(L"abc", 3, L"ABC", 3);
-
Parameters:
-
source
|
the source string array to be compared with. |
sourceLength
|
the length of the source string array. If this value is equal to -1, the string array is null-terminated. |
target
|
the string that is to be compared with the source string. |
targetLength
|
the length of the target string array. If this value is equal to -1, the string array is null-terminated. |
-
Returns:
-
Returns a byte value. GREATER if source is greater than target; EQUAL if source is equal to target; LESS if source is less than target
-
Stable:
-
Reimplemented from Collator. |
|
Compares a range of character data stored in two different strings based on the collation rules up to the specified length.
Returns information about whether a string is less than, greater than or equal to another string in a language. This can be overriden in a subclass. -
Parameters:
-
source
|
the source string. |
target
|
the target string to be compared with the source string. |
length
|
compares up to the specified length |
-
Returns:
-
the comparison result. GREATER if the source string is greater than the target string, LESS if the source is less than the target. Otherwise, returns EQUAL.
-
Draft:
-
Reimplemented from Collator. |
|
Compares a range of character data stored in two different strings based on the collation rules.
Returns information about whether a string is less than, greater than or equal to another string in a language. This can be overriden in a subclass. -
Parameters:
-
source
|
the source string. |
target
|
the target string to be compared with the source stirng. |
-
Returns:
-
the comparison result. GREATER if the source string is greater than the target string, LESS if the source is less than the target. Otherwise, returns EQUAL.
-
Stable:
-
Reimplemented from Collator. |
const char* RuleBasedCollator::constructFromBundle (
|
const Locale & fileName,
|
|
UErrorCode & status ) [private]
|
|
void RuleBasedCollator::constructFromFile (
|
const char * fileName,
|
|
UErrorCode & status ) [private]
|
|
|
Internal constructFromXyx() methods.
These methods do object construction from various sources. They act like assignment operators; whatever used to be in this object is discarded.
FROM RULES. This constructor turns around and calls build().
FROM CACHE. This constructor tries to get the requested cached TableCollationData object, and wrap us around it.
FROM FILE. There are two constructors named constructFromFile(). One takes a const char*: this is a path name to be passed directly to the host OS, where a flattened table collation (produced by writeToFile()) resides. The other method takes a Locale, and a UnicodeString locale file name. The distinction is this: the Locale is the locale we are seeking. The file name is the name of the data file (either binary, as produced by writeToFile(), or ASCII, as read by ResourceBundle). Within the file, if it is found, the method will look for the given Locale. |
|
Creates a collation element iterator for the source.
The caller of this method is responsible for the memory management of the returned pointer. -
Parameters:
-
source
|
the CharacterIterator which produces the characters over which the CollationElementItgerator will iterate. |
-
Returns:
-
the collation element iterator of the source using this as the based collator.
-
Stable:
-
|
|
Creates a collation element iterator for the source string.
The caller of this method is responsible for the memory management of the return pointer. -
Parameters:
-
-
Returns:
-
the collation element iterator of the source string using this as the based collator.
-
Stable:
-
|
|
Creates the path name with given information.
-
Parameters:
-
prefix
|
the prefix of the file name. |
name
|
the actual file name. |
suffix
|
the suffix of the file name. |
-
Returns:
-
the generated file name.
|
|
Universal attribute getter.
-
Parameters:
-
attr
|
attribute type |
status
|
to indicate whether the operation went on smoothly or there were errors |
-
Returns:
-
attribute value
-
Draft:
-
API 1.7 freeze
Reimplemented from Collator. |
int32_t RuleBasedCollator::getCharOrder (
|
UChar ch ) const [private]
|
|
|
Gets the comarison order of a character from the collation table.
-
Parameters:
-
-
Returns:
-
the comparison order of a character.
|
|
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare.
Use a CollationKey when you need to do repeated comparisions on the same string. For a single comparison the compare method will be faster. -
Parameters:
-
source
|
the source string. |
key
|
the transformed key of the source string. |
status
|
the error code status. |
-
Returns:
-
the transformed key.
-
See also:
-
CollationKey
-
Draft:
-
Reimplemented from Collator. |
|
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare.
Use a CollationKey when you need to do repeated comparisions on the same string. For a single comparison the compare method will be faster. -
Parameters:
-
source
|
the source string. |
key
|
the transformed key of the source string. |
status
|
the error code status. |
-
Returns:
-
the transformed key.
-
See also:
-
CollationKey
-
Draft:
-
Reimplemented from Collator. |
|
If the given string has been specified as a contracting string in this collation table, return its ordering, otherwise return UNMAPPED.
-
Parameters:
-
-
Returns:
-
the order of the contracted character, or UNMAPPED if there isn't one.
|
VectorOfPToContractElement * RuleBasedCollator::getContractValues (
|
int32_t index ) const [private]
|
|
|
Ges the entry of list of the contracting string in the collation table.
-
Parameters:
-
index
|
the index of the contract character list |
-
Returns:
-
the entry of the contracting element of the specified index in the list.
|
VectorOfPToContractElement * RuleBasedCollator::getContractValues (
|
UChar ch ) const [private]
|
|
|
Gets the entry of list of the contracting string in the collation table.
-
Parameters:
-
ch
|
the starting character of the contracting string |
-
Returns:
-
the entry of contracting element which starts with the specified character in the list of contracting elements.
|
UClassID RuleBasedCollator::getDynamicClassID (
|
void ) const [inline, virtual]
|
|
|
Returns a unique class ID POLYMORPHICALLY.
Pure virtual override. This method is to implement a simple version of RTTI, since not all C++ compilers support genuine RTTI. Polymorphic operator==() and clone() methods call this method.
-
Returns:
-
The class ID for this object. All objects of a given class have the same class ID. Objects of other classes have different class IDs.
Reimplemented from Collator.
Definition at line 587 of file tblcoll.h. |
int32_t RuleBasedCollator::getEntry (
|
VectorOfPToContractElement * list,
|
|
const UnicodeString & name,
|
|
UBool fwd ) [static, private]
|
|
|
Gets the comarison order of a character from the collation table.
-
Parameters:
-
list
|
the contracting element table. |
name
|
the contracting char string. |
-
Returns:
-
the comparison order of the contracting character.
|
VectorOfInt * RuleBasedCollator::getExpandValueList (
|
int32_t order ) const [private]
|
|
|
Gets the entry of value list of the expanding string in the collation table at the specified index.
-
Parameters:
-
order
|
the order of the expanding string value list |
-
Returns:
-
the entry of the expanding-char element of the specified index in the list.
|
|
Returns current rules.
Delta defines whether full rules are returned or just the tailoring. -
Parameters:
-
delta
|
one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. |
-
Returns:
-
UnicodeString with rules
|
|
Gets the table-based rules for the collation object.
-
Returns:
-
returns the collation rules that the table collation object was created from.
-
Stable:
-
|
void* RuleBasedCollator::getSomeMemory (
|
int32_t size ) [private]
|
|
|
Get the sort key as an array of bytes from an UChar buffer.
-
Parameters:
-
source
|
string to be processed. |
sourceLength
|
length of string to be processed. If -1, the string is 0 terminated and length will be decided by the function. |
result
|
buffer to store result in. If NULL, number of bytes needed will be returned. |
resultLength
|
length of the result buffer. If if not enough the buffer will be filled to capacity. |
-
Returns:
-
Number of bytes needed for storing the sort key
-
Draft:
-
API 1.7 freeze
Reimplemented from Collator. |
|
Get the sort key as an array of bytes from an UnicodeString.
-
Parameters:
-
source
|
string to be processed. |
result
|
buffer to store result in. If NULL, number of bytes needed will be returned. |
resultLength
|
length of the result buffer. If if not enough the buffer will be filled to capacity. |
-
Returns:
-
Number of bytes needed for storing the sort key
-
Draft:
-
API 1.7 freeze
Reimplemented from Collator. |
UClassID RuleBasedCollator::getStaticClassID (
|
void ) [inline, static]
|
|
|
Returns the class ID for this class.
This is useful only for comparing to a return value from getDynamicClassID(). For example:
Base* polymorphic_pointer = createPolymorphicObject(); if (polymorphic_pointer->getDynamicClassID() == Derived::getStaticClassID()) ...
-
Returns:
-
The class ID for all objects of this class.
Definition at line 601 of file tblcoll.h.
Referenced by getDynamicClassID(). |
int32_t RuleBasedCollator::getStrengthOrder (
|
NormalizerIterator * cursor,
|
|
UErrorCode status ) const [private]
|
|
int32_t RuleBasedCollator::hashCode (
|
void ) const [virtual]
|
|
|
Generates the hash code for the rule-based collation object.
-
Returns:
-
the hash code.
-
Stable:
-
Reimplemented from Collator. |
|
Increment of the last order based on the collation strength.
-
Parameters:
-
s
|
the collation strength. |
lastOrder
|
the last collation order. |
-
Returns:
-
the new collation order.
|
UBool RuleBasedCollator::initMutex (
|
void ) [static, private]
|
|
VectorOfInt* RuleBasedCollator::makeReorderedBuffer (
|
NormalizerIterator * cursor,
|
|
UChar colFirst,
|
|
int32_t lastValue,
|
|
VectorOfInt * lastExpansion ) const [private]
|
|
int32_t RuleBasedCollator::nextContractChar (
|
NormalizerIterator * cursor,
|
|
UChar ch,
|
|
UErrorCode & status ) const [private]
|
|
UBool RuleBasedCollator::operator!= (
|
const Collator & other ) const [inline, virtual]
|
|
|
Returns true if "other" is not the same as "this".
-
Stable:
-
Reimplemented from Collator.
Definition at line 1021 of file tblcoll.h. |
RuleBasedCollator & RuleBasedCollator::operator= (
|
const RuleBasedCollator & other )
|
|
|
Assignment operator.
-
Stable:
-
|
UBool RuleBasedCollator::operator== (
|
const Collator & other ) const [virtual]
|
|
|
Returns true if "other" is the same as "this".
-
Stable:
-
Reimplemented from Collator. |
Collator * RuleBasedCollator::safeClone (
|
void ) [virtual]
|
|
|
Thread safe cloning operation.
-
Returns:
-
pointer to the new clone, user should remove it.
-
Draft:
-
API 1.7 freeze
Reimplemented from Collator. |
|
Universal attribute setter.
-
Parameters:
-
attr
|
attribute type |
value
|
attribute value |
status
|
to indicate whether the operation went on smoothly or there were errors |
-
Draft:
-
API 1.7 freeze
Reimplemented from Collator. |
int32_t RuleBasedCollator::strengthOrder (
|
int32_t value ) const [private]
|
|
UBool RuleBasedCollator::writeToFile (
|
const char * fileName ) const [private]
|
|
|
Flattens the given object persistently to a file.
The file name argument should be a path name that can be passed directly to the underlying OS. Once a RuleBasedCollator has been written to a file, it can be resurrected by calling the RuleBasedCollator(const char*) constructor, which operates very quickly. -
Parameters:
-
fileName
|
the output file name. |
-
Returns:
-
TRUE if writing to the file was successful, FALSE otherwise.
|
Friends And Related Function Documentation
class CollationElementIterator [friend]
|
|
class RuleBasedCollatorStreamer [friend]
|
|
|
Compare two strings.
The strings will be compared using the normalization mode and options specified in \Ref{ucol_open} or \Ref{ucol_openRules} -
Parameters:
-
coll
|
The UCollator containing the comparison rules. |
source
|
The source string. |
sourceLength
|
The length of source, or -1 if null-terminated. |
target
|
The target string. |
targetLength
|
The length of target, or -1 if null-terminated. |
-
Returns:
-
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS
-
See also:
-
ucol_greater , ucol_greaterOrEqual , ucol_equal
-
Stable:
-
|
Member Data Documentation
const int32_t RuleBasedCollator::CHARINDEX [static, private]
|
|
const int32_t RuleBasedCollator::COLELEMENTSTART [static, private]
|
|
const int32_t RuleBasedCollator::CONTRACTCHARINDEX [static, private]
|
|
const int32_t RuleBasedCollator::CONTRACTCHAROVERFLOW [static, private]
|
|
const int32_t RuleBasedCollator::EXPANDCHARINDEX [static, private]
|
|
const int16_t RuleBasedCollator::FILEID [static, private]
|
|
const int32_t RuleBasedCollator::IGNORABLE [static, private]
|
|
const int32_t RuleBasedCollator::IGNORABLEMASK [static, private]
|
|
const int32_t RuleBasedCollator::MAXIGNORABLE [static, private]
|
|
const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY [static, private]
|
|
const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK [static, private]
|
|
const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT [static, private]
|
|
const int32_t RuleBasedCollator::PRIMARYORDERMASK [static, private]
|
|
const int32_t RuleBasedCollator::PRIMARYORDERSHIFT [static, private]
|
|
const int32_t RuleBasedCollator::PRIMIGNORABLE [static, private]
|
|
const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY [static, private]
|
|
const int32_t RuleBasedCollator::RESETTERTIARY [static, private]
|
|
const int32_t RuleBasedCollator::SECIGNORABLE [static, private]
|
|
const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY [static, private]
|
|
const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT [static, private]
|
|
const int32_t RuleBasedCollator::SECONDARYORDERMASK [static, private]
|
|
const int32_t RuleBasedCollator::SECONDARYORDERSHIFT [static, private]
|
|
const int32_t RuleBasedCollator::SECONDARYRESETMASK [static, private]
|
|
const int32_t RuleBasedCollator::SORTKEYOFFSET [static, private]
|
|
const int32_t RuleBasedCollator::TERIGNORABLE [static, private]
|
|
const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT [static, private]
|
|
const int32_t RuleBasedCollator::TERTIARYORDERMASK [static, private]
|
|
const int32_t RuleBasedCollator::UNMAPPED [static, private]
|
|
UChar RuleBasedCollator::cacheKey [static, private]
|
|
UMTX RuleBasedCollator::collMutex [static, private]
|
|
NormalizerIterator * RuleBasedCollator::cursor1 [private]
|
|
NormalizerIterator * RuleBasedCollator::cursor2 [private]
|
|
TableCollationData * RuleBasedCollator::data [private]
|
|
UBool RuleBasedCollator::dataIsOwned [private]
|
|
int32_t RuleBasedCollator::fAvailableMemory [private]
|
|
int32_t * RuleBasedCollator::fSizes [private]
|
|
void ** RuleBasedCollator::fSomeMemory [private]
|
|
int32_t RuleBasedCollator::fUsedMemory [private]
|
|
char RuleBasedCollator::fgClassID [static, private]
|
|
UBool RuleBasedCollator::isMutexInited [static, private]
|
|
UBool RuleBasedCollator::isOverIgnore [private]
|
|
const char * RuleBasedCollator::kFilenameSuffix [static, private]
|
|
UChar RuleBasedCollator::lastChar [private]
|
|
MergeCollation * RuleBasedCollator::mPattern [private]
|
|
The documentation for this class was generated from the following file:
Generated at Fri Dec 15 12:13:43 2000 for ICU 1.7 by
1.2.3 written by Dimitri van Heesch,
© 1997-2000