#include "unicode/utypes.h"
#include "unicode/unorm.h"
Go to the source code of this file.
Defines | |
#define | UCOL_PRIMARYMASK 0xFFFF0000 |
#define | UCOL_SECONDARYMASK 0x0000FF00 |
#define | UCOL_TERTIARYMASK 0x000000FF |
#define | UCOL_NULLORDER 0xFFFFFFFF |
This indicates the last element in a UCollationElements has been consumed. More... | |
Typedefs | |
typedef struct collIterate | collIterate |
typedef struct incrementalContext | incrementalContext |
typedef void* | UCollator |
A collator. More... | |
typedef UColAttributeValue | UCollationStrength |
Possible collation strengths - all under UColAttributeValue. More... | |
typedef struct UCollationElements | UCollationElements |
typedef UChar | UCharForwardIterator (void *context) |
Enumerations | |
enum | UCollationResult { UCOL_EQUAL = 0, UCOL_GREATER = 1, UCOL_LESS = -1 } |
Possible values for a comparison result. More... | |
enum | UColAttributeValue { UCOL_DEFAULT = -1, UCOL_PRIMARY = 0, UCOL_SECONDARY = 1, UCOL_TERTIARY = 2, UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, UCOL_QUATERNARY = 3, UCOL_IDENTICAL = 15, UCOL_OFF = 16, UCOL_ON = 17, UCOL_SHIFTED = 20, UCOL_NON_IGNORABLE = 21, UCOL_LOWER_FIRST = 24, UCOL_UPPER_FIRST = 25, UCOL_ON_WITHOUT_HANGUL = 28, UCOL_ATTRIBUTE_VALUE_COUNT } |
enum | UColAttribute { UCOL_FRENCH_COLLATION, UCOL_ALTERNATE_HANDLING, UCOL_CASE_FIRST, UCOL_CASE_LEVEL, UCOL_NORMALIZATION_MODE, UCOL_STRENGTH, UCOL_ATTRIBUTE_COUNT } |
enum | UColRuleOption { UCOL_TAILORING_ONLY, UCOL_FULL_RULES } |
Functions | |
U_CAPI UCollator* | ucol_open ( const char *loc, UErrorCode *status) |
Open a UCollator for comparing strings. More... | |
U_CAPI UCollator* | ucol_openRules ( const UChar *rules, int32_t rulesLength, UNormalizationMode mode, UCollationStrength strength, UErrorCode *status) |
Open a UCollator for comparing strings. More... | |
U_CAPI void | ucol_close (UCollator *coll) |
Close a UCollator. More... | |
U_CAPI UCollationResult | ucol_strcoll ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Compare two strings. More... | |
U_CAPI UCollationResult | ucol_strcollEx ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
DO NOT USE THIS API!!! It is the old implementation of ucol_strcoll and is used only for testing purposes. More... | |
U_CAPI UBool | ucol_greater ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Determine if one string is greater than another. More... | |
U_CAPI UBool | ucol_greaterOrEqual ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Determine if one string is greater than or equal to another. More... | |
U_CAPI UBool | ucol_equal ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Compare two strings for equality. More... | |
U_CAPI UCollationStrength | ucol_getStrength (const UCollator *coll) |
Get the collation strength used in a UCollator. More... | |
U_CAPI void | ucol_setStrength ( UCollator *coll, UCollationStrength strength) |
Set the collation strength used in a UCollator. More... | |
U_CAPI UNormalizationMode | ucol_getNormalization (const UCollator* coll) |
Get the normalization mode used in a UCollator. More... | |
U_CAPI void | ucol_setNormalization ( UCollator *coll, UNormalizationMode mode) |
Set the normalization mode used in a UCollator. More... | |
U_CAPI int32_t | ucol_getDisplayName ( const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status) |
Get the display name for a UCollator. More... | |
U_CAPI const char* | ucol_getAvailable (int32_t index) |
Get a locale for which collation rules are available. More... | |
U_CAPI int32_t | ucol_countAvailable (void) |
Determine how many locales have collation rules available. More... | |
U_CAPI const UChar* | ucol_getRules ( const UCollator *coll, int32_t *length) |
Get the collation rules from a UCollator. More... | |
U_CAPI int32_t | ucol_getSortKey (const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) |
Get a sort key for a string from a UCollator. More... | |
U_CAPI int32_t | ucol_getSortKeyEx (const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) |
DO NOT USE THIS API!!! It is the old implementation of ucol_getSortKey and is used only for testing purposes. More... | |
U_CAPI int32_t | ucol_keyHashCode ( const uint8_t* key, int32_t length) |
Generate a hash code for a collation key. More... | |
U_CAPI UCollationElements* | ucol_openElements ( const UCollator *coll, const UChar *text, int32_t textLength, UErrorCode *status) |
Open the collation elements for a string. More... | |
U_CAPI void | ucol_closeElements (UCollationElements *elems) |
Close a UCollationElements. More... | |
U_CAPI void | ucol_reset (UCollationElements *elems) |
Reset the collation elements to their initial state. More... | |
U_CAPI int32_t | ucol_next ( UCollationElements *elems, UErrorCode *status) |
Get the ordering priority of the next collation element in the text. More... | |
U_CAPI int32_t | ucol_previous ( UCollationElements *elems, UErrorCode *status) |
Get the ordering priority of the previous collation element in the text. More... | |
U_CAPI int32_t | ucol_getMaxExpansion ( const UCollationElements *elems, int32_t order) |
Get the maximum length of any expansion sequences that end with the specified comparison order. More... | |
U_CAPI void | ucol_setText ( UCollationElements *elems, const UChar *text, int32_t textLength, UErrorCode *status) |
Set the text containing the collation elements. More... | |
U_CAPI UTextOffset | ucol_getOffset (const UCollationElements *elems) |
Get the offset of the current source character. More... | |
U_CAPI void | ucol_setOffset ( UCollationElements *elems, UTextOffset offset, UErrorCode *status) |
Set the offset of the current source character. More... | |
U_CAPI void U_EXPORT2 | ucol_getVersion (const UCollator* coll, UVersionInfo info) |
Gets the version information for a Collator. More... | |
U_CAPI void | ucol_setAttribute (UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) |
Universal attribute setter. More... | |
U_CAPI UColAttributeValue | ucol_getAttribute (const UCollator *coll, UColAttribute attr, UErrorCode *status) |
Universal attribute getter. More... | |
U_CAPI UCollator* | ucol_safeClone (const UCollator *coll, void *stackBuffer, uint32_t bufferSize, UErrorCode *status) |
Thread safe cloning operation. More... | |
U_CAPI UCollationResult | ucol_strcollinc (const UCollator *coll, UCharForwardIterator *source, void *sourceContext, UCharForwardIterator *target, void *targetContext) |
String compare that uses user supplied character iteration. More... | |
U_CAPI int32_t | ucol_getRulesEx (const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) |
Returns current rules. More... |
The C API for Collator performs locale-sensitive string comparison. You use this class to build searching and sorting routines for natural language text. Important: The ICU collation implementation is being reworked. This means that collation results and especially sort keys will change from ICU 1.6 to 1.7 and again to 1.8. For details, see the collation design document.
Like other locale-sensitive classes, you can use the function ucol_open()
, to obtain the appropriate pointer to UCollator
object for a given locale. If you need to understand the details of a particular collation strategy or if you need to modify that strategy.
The following example shows how to compare two strings using the UCollator
for the default locale.
// Compare two strings in the default locale UErrorCode success = U_ZERO_ERROR; UCollator* myCollator = ucol_open(NULL, &success); UChar source[4], target[4]; u_uastrcpy(source, "abc"); u_uastrcpy(target, "ABC"); if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_LESS) { printf("abc is less than ABC\n"); }else{ printf("abc is greater than or equal to ABC\n"); }
You can set a Collator
's strength property to determine the level of difference considered significant in comparisons. Four strengths are provided: UCOL_PRIMARY
, UCOL_SECONDARY
, UCOL_TERTIARY
, and UCOL_IDENTICAL
. The exact assignment of strengths to language features is locale dependant. For example, in Czech, "e" and "f" are considered primary differences, while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary differences and "e" and "e" are identical. The following shows how both case and accents could be ignored for US English.
//Get the Collator for US English and set its strength to UCOL_PRIMARY UErrorCode success = U_ZERO_ERROR; UCollator* usCollator = ucol_open("en_US", &success); ucol_setStrength(usCollator, UCOL_PRIMARY); UChar source[4], target[4]; u_uastrcpy(source, "abc"); u_uastrcpy(target, "ABC"); if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_EQUAL) { printf("'abc' and 'ABC' strings are equivalent with strength UCOL_PRIMARY\n"); }
For comparing strings exactly once, the u_strcoll
method provides the best performance. When sorting a list of strings however, it is generally necessary to compare each string multiple times. In this case, sort keys provide better performance. The ucol_getSortKey
method converts a string to a series of bytes that can be compared bitwise against other sort keys using strcmp()
. Sort keys are written as zero-terminated byte strings. They consist of several substrings, one for each collation strength level, that are delimited by 0x01 bytes. If the string code points are appended for UCOL_IDENTICAL, then they are processed for correct code point order comparison and may contain 0x01 bytes but not zero bytes.
Note: UCollator
s with different Locale, Collation Strength and Decomposition Mode settings will return different sort orders for the same set of strings. Locales have specific collation rules, and the way in which secondary and tertiary differences are taken into account, for example, will result in a different sorting order for same strings.
Definition in file ucol.h.
|
This indicates the last element in a UCollationElements has been consumed.
|
|
|
|
|
|
|
|
|
|
|
|
Possible collation strengths - all under UColAttributeValue.
|
|
A collator. For usage in C programs. |
|
|
|
|
|
|
|
|
|
|
Possible values for a comparison result.
|
|
Close a UCollator. Once closed, a UCollator should not be used.
|
|
Close a UCollationElements. Once closed, a UCollationElements may no longer be used.
|
|
Determine how many locales have collation rules available. This function is most useful as determining the loop ending condition for calls to \Ref{ucol_getAvailable}.
|
|
Compare two strings for equality. This function is equivalent to \Ref{ucol_strcoll} == UCOL_EQUAL
|
|
Universal attribute getter.
|
|
Get a locale for which collation rules are available. A UCollator in a locale returned by this function will perform the correct collation for the locale.
|
|
Get the display name for a UCollator. The display name is suitable for presentation to a user.
|
|
Get the maximum length of any expansion sequences that end with the specified comparison order. This is useful for .... ?
|
|
Get the normalization mode used in a UCollator. The normalization mode influences how strings are compared.
|
|
Get the offset of the current source character. This is an offset into the text of the character containing the current collation elements.
|
|
Get the collation rules from a UCollator. The rules will follow the rule syntax.
|
|
Returns current rules. Delta defines whether full rules are returned or just the tailoring. Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough to store rules, will store up to available space.
|
|
Get a sort key for a string from a UCollator.
Sort keys may be compared using
|
|
DO NOT USE THIS API!!! It is the old implementation of ucol_getSortKey and is used only for testing purposes.
For internal use only. |
|
Get the collation strength used in a UCollator. The strength influences how strings are compared.
|
|
Gets the version information for a Collator.
|
|
Determine if one string is greater than another. This function is equivalent to \Ref{ucol_strcoll} == UCOL_GREATER
|
|
Determine if one string is greater than or equal to another. This function is equivalent to \Ref{ucol_strcoll} != UCOL_LESS
|
|
Generate a hash code for a collation key. A hash code is a 32-bit value suitable for use as a key in a hashtable.
|
|
Get the ordering priority of the next collation element in the text. A single character may contain more than one collation element.
|
|
Open a UCollator for comparing strings. The UCollator may be used in calls to \Ref{ucol_strcoll}.
|
|
Open the collation elements for a string.
|
|
Open a UCollator for comparing strings. The UCollator may be used in calls to \Ref{ucol_strcoll}.
|
|
Get the ordering priority of the previous collation element in the text. A single character may contain more than one collation element.
|
|
Reset the collation elements to their initial state. This will move the 'cursor' to the beginning of the text.
|
|
Thread safe cloning operation.
|
|
Universal attribute setter.
|
|
Set the normalization mode used in a UCollator. The normalization mode influences how strings are compared.
|
|
Set the offset of the current source character. This is an offset into the text of the character to be processed.
|
|
Set the collation strength used in a UCollator. The strength influences how strings are compared. Example of use: . UCollationResult result; . UChar *source, *target; . UErrorCode status = U_ZERO_ERROR; . UCollator *myCollation = ucol_open("en_US", status); . if (U_FAILURE(&status)) return; . ucol_setStrength(myCollation, UCOL_PRIMARY); . u_uastrcpy(source, "abc"); . u_uastrcpy(target, "ABC"); . // result will be "abc" == "ABC" . // tertiary differences will be ignored . result = ucol_strcoll(myCollation, source, u_strlen(source), target, u_strlen(target));
|
|
Set the text containing the collation elements. This
|
|
Compare two strings. The strings will be compared using the normalization mode and options specified in \Ref{ucol_open} or \Ref{ucol_openRules}
|
|
DO NOT USE THIS API!!! It is the old implementation of ucol_strcoll and is used only for testing purposes.
For internal use only. |
|
String compare that uses user supplied character iteration. The idea is to prevent users from having to convert the whole string into UChar's before comparing since sometimes strings differ on first couple of characters.
|