#include "unicode/utypes.h"
#include "unicode/unorm.h"
#include "unicode/ucoleitr.h"
Go to the source code of this file.
Defines | |
#define | U_COL_SAFECLONE_BUFFERSIZE 256 |
Typedefs | |
typedef struct collIterate | collIterate |
typedef struct incrementalContext | incrementalContext |
typedef struct UCollator | UCollator |
typedef UColAttributeValue | UCollationStrength |
Possible collation strengths - all under UColAttributeValue. More... | |
Enumerations | |
enum | UCollationResult { UCOL_EQUAL = 0, UCOL_GREATER = 1, UCOL_LESS = -1 } |
Possible values for a comparison result. More... | |
enum | UColAttributeValue { UCOL_DEFAULT = -1, UCOL_PRIMARY = 0, UCOL_SECONDARY = 1, UCOL_TERTIARY = 2, UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, UCOL_CE_STRENGTH_LIMIT, UCOL_QUATERNARY = 3, UCOL_IDENTICAL = 15, UCOL_STRENGTH_LIMIT, UCOL_OFF = 16, UCOL_ON = 17, UCOL_SHIFTED = 20, UCOL_NON_IGNORABLE = 21, UCOL_LOWER_FIRST = 24, UCOL_UPPER_FIRST = 25, UCOL_ON_WITHOUT_HANGUL = 28, UCOL_ATTRIBUTE_VALUE_COUNT } |
enum | UColAttribute { UCOL_FRENCH_COLLATION, UCOL_ALTERNATE_HANDLING, UCOL_CASE_FIRST, UCOL_CASE_LEVEL, UCOL_NORMALIZATION_MODE, UCOL_STRENGTH, UCOL_ATTRIBUTE_COUNT } |
enum | UColRuleOption { UCOL_TAILORING_ONLY, UCOL_FULL_RULES } |
Functions | |
U_CAPI UCollator* | ucol_open ( const char *loc, UErrorCode *status) |
Open a UCollator for comparing strings. More... | |
U_CAPI UCollator* U_EXPORT2 | ucol_openVersion (const char *loc, UVersionInfo version, UErrorCode *status) |
Open a UCollator with a specific version. More... | |
U_CAPI UCollator* | ucol_openRules ( const UChar *rules, int32_t rulesLength, UNormalizationMode mode, UCollationStrength strength, UErrorCode *status) |
Open a UCollator for comparing strings. More... | |
U_CAPI void | ucol_close (UCollator *coll) |
Close a UCollator. More... | |
U_CAPI UCollationResult | ucol_strcoll ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Compare two strings. More... | |
U_CAPI UBool | ucol_greater ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Determine if one string is greater than another. More... | |
U_CAPI UBool | ucol_greaterOrEqual ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Determine if one string is greater than or equal to another. More... | |
U_CAPI UBool | ucol_equal ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) |
Compare two strings for equality. More... | |
U_CAPI UCollationStrength | ucol_getStrength (const UCollator *coll) |
Get the collation strength used in a UCollator. More... | |
U_CAPI void | ucol_setStrength ( UCollator *coll, UCollationStrength strength) |
Set the collation strength used in a UCollator. More... | |
U_CAPI UNormalizationMode | ucol_getNormalization (const UCollator* coll) |
Get the normalization mode used in a UCollator. More... | |
U_CAPI void | ucol_setNormalization ( UCollator *coll, UNormalizationMode mode) |
Set the normalization mode used in a UCollator. More... | |
U_CAPI int32_t | ucol_getDisplayName ( const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status) |
Get the display name for a UCollator. More... | |
U_CAPI const char* | ucol_getAvailable (int32_t index) |
Get a locale for which collation rules are available. More... | |
U_CAPI int32_t | ucol_countAvailable (void) |
Determine how many locales have collation rules available. More... | |
U_CAPI const UChar* | ucol_getRules ( const UCollator *coll, int32_t *length) |
Get the collation rules from a UCollator. More... | |
U_CAPI int32_t | ucol_getSortKey (const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) |
Get a sort key for a string from a UCollator. More... | |
U_CAPI void U_EXPORT2 | ucol_getVersion (const UCollator* coll, UVersionInfo info) |
Gets the version information for a Collator. More... | |
U_CAPI void | ucol_setAttribute (UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) |
Universal attribute setter. More... | |
U_CAPI UColAttributeValue | ucol_getAttribute (const UCollator *coll, UColAttribute attr, UErrorCode *status) |
Universal attribute getter. More... | |
U_CAPI UCollator* | ucol_safeClone ( const UCollator *coll, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) |
Thread safe cloning operation. More... | |
U_CDECL_BEGIN typedef UChar | UCharForwardIterator (void *context) |
U_CDECL_END U_CAPI UCollationResult | ucol_strcollinc (const UCollator *coll, UCharForwardIterator *source, void *sourceContext, UCharForwardIterator *target, void *targetContext) |
String compare that uses user supplied character iteration. More... | |
U_CAPI int32_t | ucol_getRulesEx (const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) |
Returns current rules. More... |
The C API for Collator performs locale-sensitive string comparison. You use this class to build searching and sorting routines for natural language text. Important: The ICU collation implementation is being reworked. This means that collation results and especially sort keys will change from ICU 1.6 to 1.7 and again to 1.8. For details, see the collation design document.
Like other locale-sensitive classes, you can use the function ucol_open()
, to obtain the appropriate pointer to UCollator
object for a given locale. If you need to understand the details of a particular collation strategy or if you need to modify that strategy.
The following example shows how to compare two strings using the UCollator
for the default locale.
// Compare two strings in the default locale UErrorCode success = U_ZERO_ERROR; UCollator* myCollator = ucol_open(NULL, &success); UChar source[4], target[4]; u_uastrcpy(source, "abc"); u_uastrcpy(target, "ABC"); if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_LESS) { printf("abc is less than ABC\n"); }else{ printf("abc is greater than or equal to ABC\n"); }
You can set a Collator
's strength property to determine the level of difference considered significant in comparisons. Four strengths are provided: UCOL_PRIMARY
, UCOL_SECONDARY
, UCOL_TERTIARY
, and UCOL_IDENTICAL
. The exact assignment of strengths to language features is locale dependant. For example, in Czech, "e" and "f" are considered primary differences, while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary differences and "e" and "e" are identical. The following shows how both case and accents could be ignored for US English.
//Get the Collator for US English and set its strength to UCOL_PRIMARY UErrorCode success = U_ZERO_ERROR; UCollator* usCollator = ucol_open("en_US", &success); ucol_setStrength(usCollator, UCOL_PRIMARY); UChar source[4], target[4]; u_uastrcpy(source, "abc"); u_uastrcpy(target, "ABC"); if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_EQUAL) { printf("'abc' and 'ABC' strings are equivalent with strength UCOL_PRIMARY\n"); }
For comparing strings exactly once, the u_strcoll
method provides the best performance. When sorting a list of strings however, it is generally necessary to compare each string multiple times. In this case, sort keys provide better performance. The ucol_getSortKey
method converts a string to a series of bytes that can be compared bitwise against other sort keys using strcmp()
. Sort keys are written as zero-terminated byte strings. They consist of several substrings, one for each collation strength level, that are delimited by 0x01 bytes. If the string code points are appended for UCOL_IDENTICAL, then they are processed for correct code point order comparison and may contain 0x01 bytes but not zero bytes.
Note: UCollator
s with different Locale, Collation Strength and Decomposition Mode settings will return different sort orders for the same set of strings. Locales have specific collation rules, and the way in which secondary and tertiary differences are taken into account, for example, will result in a different sorting order for same strings.
Definition in file ucol.h.
|
|
|
Possible collation strengths - all under UColAttributeValue.
|
|
|
|
|
|
|
|
|
|
|
|
|
Possible values for a comparison result.
|
|
|
|
Close a UCollator. Once closed, a UCollator should not be used.
Referenced by RuleBasedCollator::setUCollator(). |
|
Determine how many locales have collation rules available. This function is most useful as determining the loop ending condition for calls to \Ref{ucol_getAvailable}.
|
|
Compare two strings for equality. This function is equivalent to \Ref{ucol_strcoll} == UCOL_EQUAL
|
|
Universal attribute getter.
|
|
Get a locale for which collation rules are available. A UCollator in a locale returned by this function will perform the correct collation for the locale.
|
|
Get the display name for a UCollator. The display name is suitable for presentation to a user.
|
|
Get the normalization mode used in a UCollator. The normalization mode influences how strings are compared.
|
|
Get the collation rules from a UCollator. The rules will follow the rule syntax.
|
|
Returns current rules. Delta defines whether full rules are returned or just the tailoring. Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough to store rules, will store up to available space.
|
|
Get a sort key for a string from a UCollator.
Sort keys may be compared using
|
|
Get the collation strength used in a UCollator. The strength influences how strings are compared.
|
|
Gets the version information for a Collator.
|
|
Determine if one string is greater than another. This function is equivalent to \Ref{ucol_strcoll} == UCOL_GREATER
|
|
Determine if one string is greater than or equal to another. This function is equivalent to \Ref{ucol_strcoll} != UCOL_LESS
|
|
Open a UCollator for comparing strings. The UCollator may be used in calls to \Ref{ucol_strcoll}.
Referenced by RuleBasedCollator::setUCollator(). |
|
Open a UCollator for comparing strings. The UCollator may be used in calls to \Ref{ucol_strcoll}.
|
|
Open a UCollator with a specific version. This is the same as ucol_open() except that ucol_getVersion() of the returned object is guaranteed to be the same as the version parameter. This is designed to be used to open the same collator for a given locale even when ICU is updated. The same locale and version guarantees the same sort keys and comparison results.
|
|
Thread safe cloning operation.
|
|
Universal attribute setter.
|
|
Set the normalization mode used in a UCollator. The normalization mode influences how strings are compared.
|
|
Set the collation strength used in a UCollator. The strength influences how strings are compared. Example of use: . UCollationResult result; . UChar *source, *target; . UErrorCode status = U_ZERO_ERROR; . UCollator *myCollation = ucol_open("en_US", status); . if (U_FAILURE(&status)) return; . ucol_setStrength(myCollation, UCOL_PRIMARY); . u_uastrcpy(source, "abc"); . u_uastrcpy(target, "ABC"); . // result will be "abc" == "ABC" . // tertiary differences will be ignored . result = ucol_strcoll(myCollation, source, u_strlen(source), target, u_strlen(target));
|
|
Compare two strings. The strings will be compared using the normalization mode and options specified in \Ref{ucol_open} or \Ref{ucol_openRules}
|
|
String compare that uses user supplied character iteration. The idea is to prevent users from having to convert the whole string into UChar's before comparing since sometimes strings differ on first couple of characters.
|